/////////////////////////////////////////////////////////////////////////////// // /// \file file_io.c /// \brief File opening, unlinking, and closing // // Author: Lasse Collin // // This file has been put into the public domain. // You can do whatever you want with this file. // /////////////////////////////////////////////////////////////////////////////// #include "private.h" #include #ifdef TUKLIB_DOSLIKE # include #else # include static bool warn_fchown; #endif #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) # include #elif defined(HAVE__FUTIME) # include #elif defined(HAVE_UTIME) # include #endif #ifdef HAVE_CAPSICUM # ifdef HAVE_SYS_CAPSICUM_H # include # else # include # endif #endif #include "tuklib_open_stdxxx.h" #ifndef O_BINARY # define O_BINARY 0 #endif #ifndef O_NOCTTY # define O_NOCTTY 0 #endif // Using this macro to silence a warning from gcc -Wlogical-op. #if EAGAIN == EWOULDBLOCK # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN) #else # define IS_EAGAIN_OR_EWOULDBLOCK(e) \ ((e) == EAGAIN || (e) == EWOULDBLOCK) #endif typedef enum { IO_WAIT_MORE, // Reading or writing is possible. IO_WAIT_ERROR, // Error or user_abort IO_WAIT_TIMEOUT, // poll() timed out } io_wait_ret; /// If true, try to create sparse files when decompressing. static bool try_sparse = true; #ifdef ENABLE_SANDBOX /// True if the conditions for sandboxing (described in main()) have been met. static bool sandbox_allowed = false; #endif #ifndef TUKLIB_DOSLIKE /// File status flags of standard input. This is used by io_open_src() /// and io_close_src(). static int stdin_flags; static bool restore_stdin_flags = false; /// Original file status flags of standard output. This is used by /// io_open_dest() and io_close_dest() to save and restore the flags. static int stdout_flags; static bool restore_stdout_flags = false; /// Self-pipe used together with the user_abort variable to avoid /// race conditions with signal handling. static int user_abort_pipe[2]; #endif static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size); extern void io_init(void) { // Make sure that stdin, stdout, and stderr are connected to // a valid file descriptor. Exit immediately with exit code ERROR // if we cannot make the file descriptors valid. Maybe we should // print an error message, but our stderr could be screwed anyway. tuklib_open_stdxxx(E_ERROR); #ifndef TUKLIB_DOSLIKE // If fchown() fails setting the owner, we warn about it only if // we are root. warn_fchown = geteuid() == 0; // Create a pipe for the self-pipe trick. if (pipe(user_abort_pipe)) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); // Make both ends of the pipe non-blocking. for (unsigned i = 0; i < 2; ++i) { int flags = fcntl(user_abort_pipe[i], F_GETFL); if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL, flags | O_NONBLOCK) == -1) message_fatal(_("Error creating a pipe: %s"), strerror(errno)); } #endif #ifdef __DJGPP__ // Avoid doing useless things when statting files. // This isn't important but doesn't hurt. _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE; #endif return; } #ifndef TUKLIB_DOSLIKE extern void io_write_to_user_abort_pipe(void) { // If the write() fails, it's probably due to the pipe being full. // Failing in that case is fine. If the reason is something else, // there's not much we can do since this is called in a signal // handler. So ignore the errors and try to avoid warnings with // GCC and glibc when _FORTIFY_SOURCE=2 is used. uint8_t b = '\0'; const ssize_t ret = write(user_abort_pipe[1], &b, 1); (void)ret; return; } #endif extern void io_no_sparse(void) { try_sparse = false; return; } #ifdef ENABLE_SANDBOX extern void io_allow_sandbox(void) { sandbox_allowed = true; return; } /// Enables operating-system-specific sandbox if it is possible. /// src_fd is the file descriptor of the input file. static void io_sandbox_enter(int src_fd) { if (!sandbox_allowed) { // This message is more often annoying than useful so // it's commented out. It can be useful when developing // the sandboxing code. //message(V_DEBUG, _("Sandbox is disabled due " // "to incompatible command line arguments")); return; } const char dummy_str[] = "x"; // Try to ensure that both libc and xz locale files have been // loaded when NLS is enabled. snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL)); // Try to ensure that iconv data files needed for handling multibyte // characters have been loaded. This is needed at least with glibc. tuklib_mbstr_width(dummy_str, NULL); #ifdef HAVE_CAPSICUM // Capsicum needs FreeBSD 10.0 or later. cap_rights_t rights; if (cap_enter()) goto error; if (cap_rights_limit(src_fd, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK))) goto error; // If not reading from stdin, remove all capabilities from it. if (src_fd != STDIN_FILENO && cap_rights_limit( STDIN_FILENO, cap_rights_clear(&rights))) goto error; if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights, CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP, CAP_WRITE, CAP_SEEK))) goto error; if (cap_rights_limit(STDERR_FILENO, cap_rights_init(&rights, CAP_WRITE))) goto error; if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights, CAP_EVENT))) goto error; if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights, CAP_WRITE))) goto error; #elif defined(HAVE_PLEDGE) // pledge() was introduced in OpenBSD 5.9. // // main() unconditionally calls pledge() with fairly relaxed // promises which work in all situations. Here we make the // sandbox more strict. if (pledge("stdio", "")) goto error; (void)src_fd; #else # error ENABLE_SANDBOX is defined but no sandboxing method was found. #endif // This message is annoying in xz -lvv. //message(V_DEBUG, _("Sandbox was successfully enabled")); return; error: #ifdef HAVE_CAPSICUM // If a kernel is configured without capability mode support or // used in an emulator that does not implement the capability // system calls, then the Capsicum system calls will fail and set // errno to ENOSYS. In that case xz will silently run without // the sandbox. if (errno == ENOSYS) return; #endif message_fatal(_("Failed to enable the sandbox")); } #endif // ENABLE_SANDBOX #ifndef TUKLIB_DOSLIKE /// \brief Waits for input or output to become available or for a signal /// /// This uses the self-pipe trick to avoid a race condition that can occur /// if a signal is caught after user_abort has been checked but before e.g. /// read() has been called. In that situation read() could block unless /// non-blocking I/O is used. With non-blocking I/O something like select() /// or poll() is needed to avoid a busy-wait loop, and the same race condition /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in /// POSIX) but neither is portable enough in 2013. The self-pipe trick is /// old and very portable. static io_wait_ret io_wait(file_pair *pair, int timeout, bool is_reading) { struct pollfd pfd[2]; if (is_reading) { pfd[0].fd = pair->src_fd; pfd[0].events = POLLIN; } else { pfd[0].fd = pair->dest_fd; pfd[0].events = POLLOUT; } pfd[1].fd = user_abort_pipe[0]; pfd[1].events = POLLIN; while (true) { const int ret = poll(pfd, 2, timeout); if (user_abort) return IO_WAIT_ERROR; if (ret == -1) { if (errno == EINTR || errno == EAGAIN) continue; message_error(_("%s: poll() failed: %s"), is_reading ? pair->src_name : pair->dest_name, strerror(errno)); return IO_WAIT_ERROR; } if (ret == 0) return IO_WAIT_TIMEOUT; if (pfd[0].revents != 0) return IO_WAIT_MORE; } } #endif /// \brief Unlink a file /// /// This tries to verify that the file being unlinked really is the file that /// we want to unlink by verifying device and inode numbers. There's still /// a small unavoidable race, but this is much better than nothing (the file /// could have been moved/replaced even hours earlier). static void io_unlink(const char *name, const struct stat *known_st) { #if defined(TUKLIB_DOSLIKE) // On DOS-like systems, st_ino is meaningless, so don't bother // testing it. Just silence a compiler warning. (void)known_st; #else struct stat new_st; // If --force was used, use stat() instead of lstat(). This way // (de)compressing symlinks works correctly. However, it also means // that xz cannot detect if a regular file foo is renamed to bar // and then a symlink foo -> bar is created. Because of stat() // instead of lstat(), xz will think that foo hasn't been replaced // with another file. Thus, xz will remove foo even though it no // longer is the same file that xz used when it started compressing. // Probably it's not too bad though, so this doesn't need a more // complex fix. const int stat_ret = opt_force ? stat(name, &new_st) : lstat(name, &new_st); if (stat_ret # ifdef __VMS // st_ino is an array, and we don't want to // compare st_dev at all. || memcmp(&new_st.st_ino, &known_st->st_ino, sizeof(new_st.st_ino)) != 0 # else // Typical POSIX-like system || new_st.st_dev != known_st->st_dev || new_st.st_ino != known_st->st_ino # endif ) // TRANSLATORS: When compression or decompression finishes, // and xz is going to remove the source file, xz first checks // if the source file still exists, and if it does, does its // device and inode numbers match what xz saw when it opened // the source file. If these checks fail, this message is // shown, %s being the filename, and the file is not deleted. // The check for device and inode numbers is there, because // it is possible that the user has put a new file in place // of the original file, and in that case it obviously // shouldn't be removed. message_warning(_("%s: File seems to have been moved, " "not removing"), name); else #endif // There's a race condition between lstat() and unlink() // but at least we have tried to avoid removing wrong file. if (unlink(name)) message_warning(_("%s: Cannot remove: %s"), name, strerror(errno)); return; } /// \brief Copies owner/group and permissions /// /// \todo ACL and EA support /// static void io_copy_attrs(const file_pair *pair) { // Skip chown and chmod on Windows. #ifndef TUKLIB_DOSLIKE // This function is more tricky than you may think at first. // Blindly copying permissions may permit users to access the // destination file who didn't have permission to access the // source file. // Try changing the owner of the file. If we aren't root or the owner // isn't already us, fchown() probably doesn't succeed. We warn // about failing fchown() only if we are root. if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1)) && warn_fchown) message_warning(_("%s: Cannot set the file owner: %s"), pair->dest_name, strerror(errno)); mode_t mode; // With BSD semantics the new dest file may have a group that // does not belong to the user. If the src file has the same gid // nothing has to be done. Nevertheless OpenBSD fchown(2) fails // in this case which seems to be POSIX compliant. As there is // nothing to do, skip the system call. if (pair->dest_st.st_gid != pair->src_st.st_gid && fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) { message_warning(_("%s: Cannot set the file group: %s"), pair->dest_name, strerror(errno)); // We can still safely copy some additional permissions: // `group' must be at least as strict as `other' and // also vice versa. // // NOTE: After this, the owner of the source file may // get additional permissions. This shouldn't be too bad, // because the owner would have had permission to chmod // the original file anyway. mode = ((pair->src_st.st_mode & 0070) >> 3) & (pair->src_st.st_mode & 0007); mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode; } else { // Drop the setuid, setgid, and sticky bits. mode = pair->src_st.st_mode & 0777; } if (fchmod(pair->dest_fd, mode)) message_warning(_("%s: Cannot set the file permissions: %s"), pair->dest_name, strerror(errno)); #endif // Copy the timestamps. We have several possible ways to do this, of // which some are better in both security and precision. // // First, get the nanosecond part of the timestamps. As of writing, // it's not standardized by POSIX, and there are several names for // the same thing in struct stat. long atime_nsec; long mtime_nsec; # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC) // GNU and Solaris atime_nsec = pair->src_st.st_atim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC) // BSD atime_nsec = pair->src_st.st_atimespec.tv_nsec; mtime_nsec = pair->src_st.st_mtimespec.tv_nsec; # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC) // GNU and BSD without extensions atime_nsec = pair->src_st.st_atimensec; mtime_nsec = pair->src_st.st_mtimensec; # elif defined(HAVE_STRUCT_STAT_ST_UATIME) // Tru64 atime_nsec = pair->src_st.st_uatime * 1000; mtime_nsec = pair->src_st.st_umtime * 1000; # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC) // UnixWare atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec; mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec; # else // Safe fallback atime_nsec = 0; mtime_nsec = 0; # endif // Construct a structure to hold the timestamps and call appropriate // function to set the timestamps. #if defined(HAVE_FUTIMENS) // Use nanosecond precision. struct timespec tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_nsec = atime_nsec; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_nsec = mtime_nsec; (void)futimens(pair->dest_fd, tv); #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES) // Use microsecond precision. struct timeval tv[2]; tv[0].tv_sec = pair->src_st.st_atime; tv[0].tv_usec = atime_nsec / 1000; tv[1].tv_sec = pair->src_st.st_mtime; tv[1].tv_usec = mtime_nsec / 1000; # if defined(HAVE_FUTIMES) (void)futimes(pair->dest_fd, tv); # elif defined(HAVE_FUTIMESAT) (void)futimesat(pair->dest_fd, NULL, tv); # else // Argh, no function to use a file descriptor to set the timestamp. (void)utimes(pair->dest_name, tv); # endif #elif defined(HAVE__FUTIME) // Use one-second precision with Windows-specific _futime(). // We could use utime() too except that for some reason the // timestamp will get reset at close(). With _futime() it works. // This struct cannot be const as _futime() takes a non-const pointer. struct _utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)_futime(pair->dest_fd, &buf); #elif defined(HAVE_UTIME) // Use one-second precision. utime() doesn't support using file // descriptor either. Some systems have broken utime() prototype // so don't make this const. struct utimbuf buf = { .actime = pair->src_st.st_atime, .modtime = pair->src_st.st_mtime, }; // Avoid warnings. (void)atime_nsec; (void)mtime_nsec; (void)utime(pair->dest_name, &buf); #endif return; } /// Opens the source file. Returns false on success, true on error. static bool io_open_src_real(file_pair *pair) { // There's nothing to open when reading from stdin. if (pair->src_name == stdin_filename) { pair->src_fd = STDIN_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDIN_FILENO, O_BINARY); #else // Try to set stdin to non-blocking mode. It won't work // e.g. on OpenBSD if stdout is e.g. /dev/null. In such // case we proceed as if stdin were non-blocking anyway // (in case of /dev/null it will be in practice). The // same applies to stdout in io_open_dest_real(). stdin_flags = fcntl(STDIN_FILENO, F_GETFL); if (stdin_flags == -1) { message_error(_("Error getting the file status flags " "from standard input: %s"), strerror(errno)); return true; } if ((stdin_flags & O_NONBLOCK) == 0 && fcntl(STDIN_FILENO, F_SETFL, stdin_flags | O_NONBLOCK) != -1) restore_stdin_flags = true; #endif #ifdef HAVE_POSIX_FADVISE // It will fail if stdin is a pipe and that's fine. (void)posix_fadvise(STDIN_FILENO, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; } // Symlinks are not followed unless writing to stdout or --force // or --keep was used. const bool follow_symlinks = opt_stdout || opt_force || opt_keep_original; // We accept only regular files if we are writing the output // to disk too. bzip2 allows overriding this with --force but // gzip and xz don't. const bool reg_files_only = !opt_stdout; // Flags for open() int flags = O_RDONLY | O_BINARY | O_NOCTTY; #ifndef TUKLIB_DOSLIKE // Use non-blocking I/O: // - It prevents blocking when opening FIFOs and some other // special files, which is good if we want to accept only // regular files. // - It can help avoiding some race conditions with signal handling. flags |= O_NONBLOCK; #endif #if defined(O_NOFOLLOW) if (!follow_symlinks) flags |= O_NOFOLLOW; #elif !defined(TUKLIB_DOSLIKE) // Some POSIX-like systems lack O_NOFOLLOW (it's not required // by POSIX). Check for symlinks with a separate lstat() on // these systems. if (!follow_symlinks) { struct stat st; if (lstat(pair->src_name, &st)) { message_error(_("%s: %s"), pair->src_name, strerror(errno)); return true; } else if (S_ISLNK(st.st_mode)) { message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); return true; } } #else // Avoid warnings. (void)follow_symlinks; #endif // Try to open the file. Signals have been blocked so EINTR shouldn't // be possible. pair->src_fd = open(pair->src_name, flags); if (pair->src_fd == -1) { // Signals (that have a signal handler) have been blocked. assert(errno != EINTR); #ifdef O_NOFOLLOW // Give an understandable error message if the reason // for failing was that the file was a symbolic link. // // Note that at least Linux, OpenBSD, Solaris, and Darwin // use ELOOP to indicate that O_NOFOLLOW was the reason // that open() failed. Because there may be // directories in the pathname, ELOOP may occur also // because of a symlink loop in the directory part. // So ELOOP doesn't tell us what actually went wrong, // and this stupidity went into POSIX-1.2008 too. // // FreeBSD associates EMLINK with O_NOFOLLOW and // Tru64 uses ENOTSUP. We use these directly here // and skip the lstat() call and the associated race. // I want to hear if there are other kernels that // fail with something else than ELOOP with O_NOFOLLOW. bool was_symlink = false; # if defined(__FreeBSD__) || defined(__DragonFly__) if (errno == EMLINK) was_symlink = true; # elif defined(__digital__) && defined(__unix__) if (errno == ENOTSUP) was_symlink = true; # elif defined(__NetBSD__) if (errno == EFTYPE) was_symlink = true; # else if (errno == ELOOP && !follow_symlinks) { const int saved_errno = errno; struct stat st; if (lstat(pair->src_name, &st) == 0 && S_ISLNK(st.st_mode)) was_symlink = true; errno = saved_errno; } # endif if (was_symlink) message_warning(_("%s: Is a symbolic link, " "skipping"), pair->src_name); else #endif // Something else than O_NOFOLLOW failing // (assuming that the race conditions didn't // confuse us). message_error(_("%s: %s"), pair->src_name, strerror(errno)); return true; } // Stat the source file. We need the result also when we copy // the permissions, and when unlinking. // // NOTE: Use stat() instead of fstat() with DJGPP, because // then we have a better chance to get st_ino value that can // be used in io_open_dest_real() to prevent overwriting the // source file. #ifdef __DJGPP__ if (stat(pair->src_name, &pair->src_st)) goto error_msg; #else if (fstat(pair->src_fd, &pair->src_st)) goto error_msg; #endif if (S_ISDIR(pair->src_st.st_mode)) { message_warning(_("%s: Is a directory, skipping"), pair->src_name); goto error; } if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) { message_warning(_("%s: Not a regular file, skipping"), pair->src_name); goto error; } #ifndef TUKLIB_DOSLIKE if (reg_files_only && !opt_force && !opt_keep_original) { if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) { // gzip rejects setuid and setgid files even // when --force was used. bzip2 doesn't check // for them, but calls fchown() after fchmod(), // and many systems automatically drop setuid // and setgid bits there. // // We accept setuid and setgid files if // --force or --keep was used. We drop these bits // explicitly in io_copy_attr(). message_warning(_("%s: File has setuid or " "setgid bit set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_mode & S_ISVTX) { message_warning(_("%s: File has sticky bit " "set, skipping"), pair->src_name); goto error; } if (pair->src_st.st_nlink > 1) { message_warning(_("%s: Input file has more " "than one hard link, " "skipping"), pair->src_name); goto error; } } // If it is something else than a regular file, wait until // there is input available. This way reading from FIFOs // will work when open() is used with O_NONBLOCK. if (!S_ISREG(pair->src_st.st_mode)) { signals_unblock(); const io_wait_ret ret = io_wait(pair, -1, true); signals_block(); if (ret != IO_WAIT_MORE) goto error; } #endif #ifdef HAVE_POSIX_FADVISE // It will fail with some special files like FIFOs but that is fine. (void)posix_fadvise(pair->src_fd, 0, 0, opt_mode == MODE_LIST ? POSIX_FADV_RANDOM : POSIX_FADV_SEQUENTIAL); #endif return false; error_msg: message_error(_("%s: %s"), pair->src_name, strerror(errno)); error: (void)close(pair->src_fd); return true; } extern file_pair * io_open_src(const char *src_name) { if (src_name[0] == '\0') { message_error(_("Empty filename, skipping")); return NULL; } // Since we have only one file open at a time, we can use // a statically allocated structure. static file_pair pair; // This implicitly also initializes src_st.st_size to zero // which is expected to be <= 0 by default. fstat() isn't // called when reading from standard input but src_st.st_size // is still read. pair = (file_pair){ .src_name = src_name, .dest_name = NULL, .src_fd = -1, .dest_fd = -1, .src_eof = false, .src_has_seen_input = false, .flush_needed = false, .dest_try_sparse = false, .dest_pending_sparse = 0, }; // Block the signals, for which we have a custom signal handler, so // that we don't need to worry about EINTR. signals_block(); const bool error = io_open_src_real(&pair); signals_unblock(); #ifdef ENABLE_SANDBOX if (!error) io_sandbox_enter(pair.src_fd); #endif return error ? NULL : &pair; } /// \brief Closes source file of the file_pair structure /// /// \param pair File whose src_fd should be closed /// \param success If true, the file will be removed from the disk if /// closing succeeds and --keep hasn't been used. static void io_close_src(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE if (restore_stdin_flags) { assert(pair->src_fd == STDIN_FILENO); restore_stdin_flags = false; if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1) message_error(_("Error restoring the status flags " "to standard input: %s"), strerror(errno)); } #endif if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) { // Close the file before possibly unlinking it. On DOS-like // systems this is always required since unlinking will fail // if the file is open. On POSIX systems it usually works // to unlink open files, but in some cases it doesn't and // one gets EBUSY in errno. // // xz 5.2.2 and older unlinked the file before closing it // (except on DOS-like systems). The old code didn't handle // EBUSY and could fail e.g. on some CIFS shares. The // advantage of unlinking before closing is negligible // (avoids a race between close() and stat()/lstat() and // unlink()), so let's keep this simple. (void)close(pair->src_fd); if (success && !opt_keep_original) io_unlink(pair->src_name, &pair->src_st); } return; } static bool io_open_dest_real(file_pair *pair) { if (opt_stdout || pair->src_fd == STDIN_FILENO) { // We don't modify or free() this. pair->dest_name = (char *)"(stdout)"; pair->dest_fd = STDOUT_FILENO; #ifdef TUKLIB_DOSLIKE setmode(STDOUT_FILENO, O_BINARY); #else // Try to set O_NONBLOCK if it isn't already set. // If it fails, we assume that stdout is non-blocking // in practice. See the comments in io_open_src_real() // for similar situation with stdin. // // NOTE: O_APPEND may be unset later in this function // and it relies on stdout_flags being set here. stdout_flags = fcntl(STDOUT_FILENO, F_GETFL); if (stdout_flags == -1) { message_error(_("Error getting the file status flags " "from standard output: %s"), strerror(errno)); return true; } if ((stdout_flags & O_NONBLOCK) == 0 && fcntl(STDOUT_FILENO, F_SETFL, stdout_flags | O_NONBLOCK) != -1) restore_stdout_flags = true; #endif } else { pair->dest_name = suffix_get_dest_name(pair->src_name); if (pair->dest_name == NULL) return true; #ifdef __DJGPP__ struct stat st; if (stat(pair->dest_name, &st) == 0) { // Check that it isn't a special file like "prn". if (st.st_dev == -1) { message_error("%s: Refusing to write to " "a DOS special file", pair->dest_name); free(pair->dest_name); return true; } // Check that we aren't overwriting the source file. if (st.st_dev == pair->src_st.st_dev && st.st_ino == pair->src_st.st_ino) { message_error("%s: Output file is the same " "as the input file", pair->dest_name); free(pair->dest_name); return true; } } #endif // If --force was used, unlink the target file first. if (opt_force && unlink(pair->dest_name) && errno != ENOENT) { message_error(_("%s: Cannot remove: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } // Open the file. int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL; #ifndef TUKLIB_DOSLIKE flags |= O_NONBLOCK; #endif const mode_t mode = S_IRUSR | S_IWUSR; pair->dest_fd = open(pair->dest_name, flags, mode); if (pair->dest_fd == -1) { message_error(_("%s: %s"), pair->dest_name, strerror(errno)); free(pair->dest_name); return true; } } if (fstat(pair->dest_fd, &pair->dest_st)) { // If fstat() really fails, we have a safe fallback here. #if defined(__VMS) pair->dest_st.st_ino[0] = 0; pair->dest_st.st_ino[1] = 0; pair->dest_st.st_ino[2] = 0; #else pair->dest_st.st_dev = 0; pair->dest_st.st_ino = 0; #endif } #if defined(TUKLIB_DOSLIKE) && !defined(__DJGPP__) // Check that the output file is a regular file. We open with O_EXCL // but that doesn't prevent open()/_open() on Windows from opening // files like "con" or "nul". // // With DJGPP this check is done with stat() even before opening // the output file. That method or a variant of it doesn't work on // Windows because on Windows stat()/_stat64() sets st.st_mode so // that S_ISREG(st.st_mode) will be true even for special files. // With fstat()/_fstat64() it works. else if (pair->dest_fd != STDOUT_FILENO && !S_ISREG(pair->dest_st.st_mode)) { message_error("%s: Destination is not a regular file", pair->dest_name); // dest_fd needs to be reset to -1 to keep io_close() working. (void)close(pair->dest_fd); pair->dest_fd = -1; free(pair->dest_name); return true; } #elif !defined(TUKLIB_DOSLIKE) else if (try_sparse && opt_mode == MODE_DECOMPRESS) { // When writing to standard output, we need to be extra // careful: // - It may be connected to something else than // a regular file. // - We aren't necessarily writing to a new empty file // or to the end of an existing file. // - O_APPEND may be active. // // TODO: I'm keeping this disabled for DOS-like systems // for now. FAT doesn't support sparse files, but NTFS // does, so maybe this should be enabled on Windows after // some testing. if (pair->dest_fd == STDOUT_FILENO) { if (!S_ISREG(pair->dest_st.st_mode)) return false; if (stdout_flags & O_APPEND) { // Creating a sparse file is not possible // when O_APPEND is active (it's used by // shell's >> redirection). As I understand // it, it is safe to temporarily disable // O_APPEND in xz, because if someone // happened to write to the same file at the // same time, results would be bad anyway // (users shouldn't assume that xz uses any // specific block size when writing data). // // The write position may be something else // than the end of the file, so we must fix // it to start writing at the end of the file // to imitate O_APPEND. if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1) return false; // Construct the new file status flags. // If O_NONBLOCK was set earlier in this // function, it must be kept here too. int flags = stdout_flags & ~O_APPEND; if (restore_stdout_flags) flags |= O_NONBLOCK; // If this fcntl() fails, we continue but won't // try to create sparse output. The original // flags will still be restored if needed (to // unset O_NONBLOCK) when the file is finished. if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1) return false; // Disabling O_APPEND succeeded. Mark // that the flags should be restored // in io_close_dest(). (This may have already // been set when enabling O_NONBLOCK.) restore_stdout_flags = true; } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR) != pair->dest_st.st_size) { // Writing won't start exactly at the end // of the file. We cannot use sparse output, // because it would probably corrupt the file. return false; } } pair->dest_try_sparse = true; } #endif return false; } extern bool io_open_dest(file_pair *pair) { signals_block(); const bool ret = io_open_dest_real(pair); signals_unblock(); return ret; } /// \brief Closes destination file of the file_pair structure /// /// \param pair File whose dest_fd should be closed /// \param success If false, the file will be removed from the disk. /// /// \return Zero if closing succeeds. On error, -1 is returned and /// error message printed. static bool io_close_dest(file_pair *pair, bool success) { #ifndef TUKLIB_DOSLIKE // If io_open_dest() has disabled O_APPEND, restore it here. if (restore_stdout_flags) { assert(pair->dest_fd == STDOUT_FILENO); restore_stdout_flags = false; if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) { message_error(_("Error restoring the O_APPEND flag " "to standard output: %s"), strerror(errno)); return true; } } #endif if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO) return false; if (close(pair->dest_fd)) { message_error(_("%s: Closing the file failed: %s"), pair->dest_name, strerror(errno)); // Closing destination file failed, so we cannot trust its // contents. Get rid of junk: io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return true; } // If the operation using this file wasn't successful, we git rid // of the junk file. if (!success) io_unlink(pair->dest_name, &pair->dest_st); free(pair->dest_name); return false; } extern void io_close(file_pair *pair, bool success) { // Take care of sparseness at the end of the output file. if (success && pair->dest_try_sparse && pair->dest_pending_sparse > 0) { // Seek forward one byte less than the size of the pending // hole, then write one zero-byte. This way the file grows // to its correct size. An alternative would be to use // ftruncate() but that isn't portable enough (e.g. it // doesn't work with FAT on Linux; FAT isn't that important // since it doesn't support sparse files anyway, but we don't // want to create corrupt files on it). if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when trying " "to create a sparse file: %s"), pair->dest_name, strerror(errno)); success = false; } else { const uint8_t zero[1] = { '\0' }; if (io_write_buf(pair, zero, 1)) success = false; } } signals_block(); // Copy the file attributes. We need to skip this if destination // file isn't open or it is standard output. if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO) io_copy_attrs(pair); // Close the destination first. If it fails, we must not remove // the source file! if (io_close_dest(pair, success)) success = false; // Close the source file, and unlink it if the operation using this // file pair was successful and we haven't requested to keep the // source file. io_close_src(pair, success); signals_unblock(); return; } extern void io_fix_src_pos(file_pair *pair, size_t rewind_size) { assert(rewind_size <= IO_BUFFER_SIZE); if (rewind_size > 0) { // This doesn't need to work on unseekable file descriptors, // so just ignore possible errors. (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR); } return; } extern size_t io_read(file_pair *pair, io_buf *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); size_t pos = 0; while (pos < size) { const ssize_t amount = read( pair->src_fd, buf->u8 + pos, size - pos); if (amount == 0) { pair->src_eof = true; break; } if (amount == -1) { if (errno == EINTR) { if (user_abort) return SIZE_MAX; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { // Disable the flush-timeout if no input has // been seen since the previous flush and thus // there would be nothing to flush after the // timeout expires (avoids busy waiting). const int timeout = pair->src_has_seen_input ? mytime_get_flush_timeout() : -1; switch (io_wait(pair, timeout, true)) { case IO_WAIT_MORE: continue; case IO_WAIT_ERROR: return SIZE_MAX; case IO_WAIT_TIMEOUT: pair->flush_needed = true; return pos; default: message_bug(); } } #endif message_error(_("%s: Read error: %s"), pair->src_name, strerror(errno)); return SIZE_MAX; } pos += (size_t)(amount); if (!pair->src_has_seen_input) { pair->src_has_seen_input = true; mytime_set_flush_time(); } } return pos; } extern bool io_seek_src(file_pair *pair, uint64_t pos) { // Caller must not attempt to seek past the end of the input file // (seeking to 100 in a 100-byte file is seeking to the end of // the file, not past the end of the file, and thus that is allowed). // // This also validates that pos can be safely cast to off_t. if (pos > (uint64_t)(pair->src_st.st_size)) message_bug(); if (lseek(pair->src_fd, (off_t)(pos), SEEK_SET) == -1) { message_error(_("%s: Error seeking the file: %s"), pair->src_name, strerror(errno)); return true; } pair->src_eof = false; return false; } extern bool io_pread(file_pair *pair, io_buf *buf, size_t size, uint64_t pos) { // Using lseek() and read() is more portable than pread() and // for us it is as good as real pread(). if (io_seek_src(pair, pos)) return true; const size_t amount = io_read(pair, buf, size); if (amount == SIZE_MAX) return true; if (amount != size) { message_error(_("%s: Unexpected end of file"), pair->src_name); return true; } return false; } static bool is_sparse(const io_buf *buf) { assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0); for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i) if (buf->u64[i] != 0) return false; return true; } static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); while (size > 0) { const ssize_t amount = write(pair->dest_fd, buf, size); if (amount == -1) { if (errno == EINTR) { if (user_abort) return true; continue; } #ifndef TUKLIB_DOSLIKE if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) { if (io_wait(pair, -1, false) == IO_WAIT_MORE) continue; return true; } #endif // Handle broken pipe specially. gzip and bzip2 // don't print anything on SIGPIPE. In addition, // gzip --quiet uses exit status 2 (warning) on // broken pipe instead of whatever raise(SIGPIPE) // would make it return. It is there to hide "Broken // pipe" message on some old shells (probably old // GNU bash). // // We don't do anything special with --quiet, which // is what bzip2 does too. If we get SIGPIPE, we // will handle it like other signals by setting // user_abort, and get EPIPE here. if (errno != EPIPE) message_error(_("%s: Write error: %s"), pair->dest_name, strerror(errno)); return true; } buf += (size_t)(amount); size -= (size_t)(amount); } return false; } extern bool io_write(file_pair *pair, const io_buf *buf, size_t size) { assert(size <= IO_BUFFER_SIZE); if (pair->dest_try_sparse) { // Check if the block is sparse (contains only zeros). If it // sparse, we just store the amount and return. We will take // care of actually skipping over the hole when we hit the // next data block or close the file. // // Since io_close() requires that dest_pending_sparse > 0 // if the file ends with sparse block, we must also return // if size == 0 to avoid doing the lseek(). if (size == IO_BUFFER_SIZE) { // Even if the block was sparse, treat it as non-sparse // if the pending sparse amount is large compared to // the size of off_t. In practice this only matters // on 32-bit systems where off_t isn't always 64 bits. const off_t pending_max = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2); if (is_sparse(buf) && pair->dest_pending_sparse < pending_max) { pair->dest_pending_sparse += (off_t)(size); return false; } } else if (size == 0) { return false; } // This is not a sparse block. If we have a pending hole, // skip it now. if (pair->dest_pending_sparse > 0) { if (lseek(pair->dest_fd, pair->dest_pending_sparse, SEEK_CUR) == -1) { message_error(_("%s: Seeking failed when " "trying to create a sparse " "file: %s"), pair->dest_name, strerror(errno)); return true; } pair->dest_pending_sparse = 0; } } return io_write_buf(pair, buf->u8, size); }