aboutsummaryrefslogblamecommitdiff
path: root/src/xz/args.c
blob: d28a3d40ca149147830eea1d2aec3d5cb9fdd42b (plain) (tree)
1
2
3
4
5
6
7
8
9






                                                                               
                            
  

                                                  








                                                                               


                               
                       


                                                                   
                                             
 
 

























                                                                              
           
                                                  
 
              
                                  








                             
                              

                           

                                   
                              

                                

          

                                                                


                                                  




                                                                  

                                      



                                                                  
                                                                            
                                                                  
                                                                               

                                                                         

                                             

                                                                  



                                                                                       
                                                                            
                                                                  
 


                                                                  

                          








                                                                           

                                







                                                                              
 
                                                                
          
 




                                                                         

                                                                              
                                                                 
                                                  

                              





                                                                     
 










                                                                       



                              
                                           


                              
                                                               
                                                                           




                                               
                                          










                                                   




                                            




                                         


                                               
                                                 
 


                                               





                                               










                                                 

                          
                                                     

                              



                                           





                                             
                                                     

                              


                                         



                                                                           

                              





                                                 
                             

                                                             


                                 

                                                             


                              

                                                             


                              

                                                             


                                  

                                                              


                               

                                                             


                               

                                                               

                              
                               

                                                              

                              
                               

                                                              

                              



                           













                                                                              

                          
                                     


                                                                           

                                                                       
 
                                                     





                                             

                                                 




                                                                



                                                                   
                                                             

                                                                          
                                                                               

                         





                                                                              
                                                        


                              



                                       
                               
                                                 



                                

                                                                       
                                                                           
                                                                   

                                             

                                                                          
                                

                                                                
                                                                             

                                                                       
                                                                         



                              



                                                
                        
                                           
                                                             







                 
                                                                    
 
                                    


                        
                                                      

                           


                                                                      

                                                 







                                                                         


                                               


                                                                          

                                                                            
                                                                           

                                                                       


                 


                                                                              


                          

                                                                        


                                                 
                                                     
                                              
                                      





                                               


                                                                              
 













                                                                        





                  

                                                  
 



                                                              
 

                                    
                                                        








                                                                          








                                                                         
                                                 

                                                   

                                                            
                                                   

                                                          


                 


                                                        

                                     
                                     








                                                                            

                                                                
                                                                   
                                       
 




                                                                             
                                                                  
                                                 

                                                










                                                                         

         
               
 
///////////////////////////////////////////////////////////////////////////////
//
/// \file       args.c
/// \brief      Argument parsing
///
/// \note       Filter-specific options parsing is in options.c.
//
//  Author:     Lasse Collin
//
//  This file has been put into the public domain.
//  You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////

#include "private.h"

#include "getopt.h"
#include <ctype.h>


bool opt_stdout = false;
bool opt_force = false;
bool opt_keep_original = false;
bool opt_robot = false;

// We don't modify or free() this, but we need to assign it in some
// non-const pointers.
const char *const stdin_filename = "(stdin)";


/// Parse and set the memory usage limit for compression and/or decompression.
static void
parse_memlimit(const char *name, const char *name_percentage, char *str,
		bool set_compress, bool set_decompress)
{
	bool is_percentage = false;
	uint64_t value;

	const size_t len = strlen(str);
	if (len > 0 && str[len - 1] == '%') {
		str[len - 1] = '\0';
		is_percentage = true;
		value = str_to_uint64(name_percentage, str, 1, 100);
	} else {
		// On 32-bit systems, SIZE_MAX would make more sense than
		// UINT64_MAX. But use UINT64_MAX still so that scripts
		// that assume > 4 GiB values don't break.
		value = str_to_uint64(name, str, 0, UINT64_MAX);
	}

	hardware_memlimit_set(
			value, set_compress, set_decompress, is_percentage);
	return;
}


static void
parse_real(args_info *args, int argc, char **argv)
{
	enum {
		OPT_X86 = INT_MIN,
		OPT_POWERPC,
		OPT_IA64,
		OPT_ARM,
		OPT_ARMTHUMB,
		OPT_SPARC,
		OPT_DELTA,
		OPT_LZMA1,
		OPT_LZMA2,

		OPT_NO_SPARSE,
		OPT_FILES,
		OPT_FILES0,
		OPT_MEM_COMPRESS,
		OPT_MEM_DECOMPRESS,
		OPT_NO_ADJUST,
		OPT_INFO_MEMORY,
		OPT_ROBOT,
	};

	static const char short_opts[]
			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";

	static const struct option long_opts[] = {
		// Operation mode
		{ "compress",     no_argument,       NULL,  'z' },
		{ "decompress",   no_argument,       NULL,  'd' },
		{ "uncompress",   no_argument,       NULL,  'd' },
		{ "test",         no_argument,       NULL,  't' },
		{ "list",         no_argument,       NULL,  'l' },

		// Operation modifiers
		{ "keep",         no_argument,       NULL,  'k' },
		{ "force",        no_argument,       NULL,  'f' },
		{ "stdout",       no_argument,       NULL,  'c' },
		{ "to-stdout",    no_argument,       NULL,  'c' },
		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
		{ "suffix",       required_argument, NULL,  'S' },
		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
		{ "files",        optional_argument, NULL,  OPT_FILES },
		{ "files0",       optional_argument, NULL,  OPT_FILES0 },

		// Basic compression settings
		{ "format",       required_argument, NULL,  'F' },
		{ "check",        required_argument, NULL,  'C' },
		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
		{ "memlimit",     required_argument, NULL,  'M' },
		{ "memory",       required_argument, NULL,  'M' }, // Old alias
		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
		{ "threads",      required_argument, NULL,  'T' },

		{ "extreme",      no_argument,       NULL,  'e' },
		{ "fast",         no_argument,       NULL,  '0' },
		{ "best",         no_argument,       NULL,  '9' },

		// Filters
		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
		{ "x86",          optional_argument, NULL,  OPT_X86 },
		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
		{ "arm",          optional_argument, NULL,  OPT_ARM },
		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
		{ "delta",        optional_argument, NULL,  OPT_DELTA },

		// Other options
		{ "quiet",        no_argument,       NULL,  'q' },
		{ "verbose",      no_argument,       NULL,  'v' },
		{ "no-warn",      no_argument,       NULL,  'Q' },
		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
		{ "help",         no_argument,       NULL,  'h' },
		{ "long-help",    no_argument,       NULL,  'H' },
		{ "version",      no_argument,       NULL,  'V' },

		{ NULL,           0,                 NULL,   0 }
	};

	int c;

	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
			!= -1) {
		switch (c) {
		// Compression preset (also for decompression if --format=raw)
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
			coder_set_preset(c - '0');
			break;

		// --memlimit-compress
		case OPT_MEM_COMPRESS:
			parse_memlimit("memlimit-compress",
					"memlimit-compress%", optarg,
					true, false);
			break;

		// --memlimit-decompress
		case OPT_MEM_DECOMPRESS:
			parse_memlimit("memlimit-decompress",
					"memlimit-decompress%", optarg,
					false, true);
			break;

		// --memlimit
		case 'M':
			parse_memlimit("memlimit", "memlimit%", optarg,
					true, true);
			break;

		// --suffix
		case 'S':
			suffix_set(optarg);
			break;

		case 'T':
			hardware_threadlimit_set(str_to_uint64(
					"threads", optarg, 0, UINT32_MAX));
			break;

		// --version
		case 'V':
			// This doesn't return.
			message_version();

		// --stdout
		case 'c':
			opt_stdout = true;
			break;

		// --decompress
		case 'd':
			opt_mode = MODE_DECOMPRESS;
			break;

		// --extreme
		case 'e':
			coder_set_extreme();
			break;

		// --force
		case 'f':
			opt_force = true;
			break;

		// --info-memory
		case OPT_INFO_MEMORY:
			// This doesn't return.
			hardware_memlimit_show();

		// --help
		case 'h':
			// This doesn't return.
			message_help(false);

		// --long-help
		case 'H':
			// This doesn't return.
			message_help(true);

		// --list
		case 'l':
			opt_mode = MODE_LIST;
			break;

		// --keep
		case 'k':
			opt_keep_original = true;
			break;

		// --quiet
		case 'q':
			message_verbosity_decrease();
			break;

		case 'Q':
			set_exit_no_warn();
			break;

		case 't':
			opt_mode = MODE_TEST;
			break;

		// --verbose
		case 'v':
			message_verbosity_increase();
			break;

		// --robot
		case OPT_ROBOT:
			opt_robot = true;

			// This is to make sure that floating point numbers
			// always have a dot as decimal separator.
			setlocale(LC_NUMERIC, "C");
			break;

		case 'z':
			opt_mode = MODE_COMPRESS;
			break;

		// Filter setup

		case OPT_X86:
			coder_add_filter(LZMA_FILTER_X86,
					options_bcj(optarg));
			break;

		case OPT_POWERPC:
			coder_add_filter(LZMA_FILTER_POWERPC,
					options_bcj(optarg));
			break;

		case OPT_IA64:
			coder_add_filter(LZMA_FILTER_IA64,
					options_bcj(optarg));
			break;

		case OPT_ARM:
			coder_add_filter(LZMA_FILTER_ARM,
					options_bcj(optarg));
			break;

		case OPT_ARMTHUMB:
			coder_add_filter(LZMA_FILTER_ARMTHUMB,
					options_bcj(optarg));
			break;

		case OPT_SPARC:
			coder_add_filter(LZMA_FILTER_SPARC,
					options_bcj(optarg));
			break;

		case OPT_DELTA:
			coder_add_filter(LZMA_FILTER_DELTA,
					options_delta(optarg));
			break;

		case OPT_LZMA1:
			coder_add_filter(LZMA_FILTER_LZMA1,
					options_lzma(optarg));
			break;

		case OPT_LZMA2:
			coder_add_filter(LZMA_FILTER_LZMA2,
					options_lzma(optarg));
			break;

		// Other

		// --format
		case 'F': {
			// Just in case, support both "lzma" and "alone" since
			// the latter was used for forward compatibility in
			// LZMA Utils 4.32.x.
			static const struct {
				char str[8];
				enum format_type format;
			} types[] = {
				{ "auto",   FORMAT_AUTO },
				{ "xz",     FORMAT_XZ },
				{ "lzma",   FORMAT_LZMA },
				{ "alone",  FORMAT_LZMA },
				// { "gzip",   FORMAT_GZIP },
				// { "gz",     FORMAT_GZIP },
				{ "raw",    FORMAT_RAW },
			};

			size_t i = 0;
			while (strcmp(types[i].str, optarg) != 0)
				if (++i == ARRAY_SIZE(types))
					message_fatal(_("%s: Unknown file "
							"format type"),
							optarg);

			opt_format = types[i].format;
			break;
		}

		// --check
		case 'C': {
			static const struct {
				char str[8];
				lzma_check check;
			} types[] = {
				{ "none",   LZMA_CHECK_NONE },
				{ "crc32",  LZMA_CHECK_CRC32 },
				{ "crc64",  LZMA_CHECK_CRC64 },
				{ "sha256", LZMA_CHECK_SHA256 },
			};

			size_t i = 0;
			while (strcmp(types[i].str, optarg) != 0) {
				if (++i == ARRAY_SIZE(types))
					message_fatal(_("%s: Unsupported "
							"integrity "
							"check type"), optarg);
			}

			// Use a separate check in case we are using different
			// liblzma than what was used to compile us.
			if (!lzma_check_is_supported(types[i].check))
				message_fatal(_("%s: Unsupported integrity "
						"check type"), optarg);

			coder_set_check(types[i].check);
			break;
		}

		case OPT_NO_SPARSE:
			io_no_sparse();
			break;

		case OPT_FILES:
			args->files_delim = '\n';

		// Fall through

		case OPT_FILES0:
			if (args->files_name != NULL)
				message_fatal(_("Only one file can be "
						"specified with `--files' "
						"or `--files0'."));

			if (optarg == NULL) {
				args->files_name = (char *)stdin_filename;
				args->files_file = stdin;
			} else {
				args->files_name = optarg;
				args->files_file = fopen(optarg,
						c == OPT_FILES ? "r" : "rb");
				if (args->files_file == NULL)
					message_fatal("%s: %s", optarg,
							strerror(errno));
			}

			break;

		case OPT_NO_ADJUST:
			opt_auto_adjust = false;
			break;

		default:
			message_try_help();
			tuklib_exit(E_ERROR, E_ERROR, false);
		}
	}

	return;
}


static void
parse_environment(args_info *args, char *argv0, const char *varname)
{
	char *env = getenv(varname);
	if (env == NULL)
		return;

	// We modify the string, so make a copy of it.
	env = xstrdup(env);

	// Calculate the number of arguments in env. argc stats at one
	// to include space for the program name.
	int argc = 1;
	bool prev_was_space = true;
	for (size_t i = 0; env[i] != '\0'; ++i) {
		// NOTE: Cast to unsigned char is needed so that correct
		// value gets passed to isspace(), which expects
		// unsigned char cast to int. Casting to int is done
		// automatically due to integer promotion, but we need to
		// force char to unsigned char manually. Otherwise 8-bit
		// characters would get promoted to wrong value if
		// char is signed.
		if (isspace((unsigned char)env[i])) {
			prev_was_space = true;
		} else if (prev_was_space) {
			prev_was_space = false;

			// Keep argc small enough to fit into a singed int
			// and to keep it usable for memory allocation.
			if (++argc == my_min(
					INT_MAX, SIZE_MAX / sizeof(char *)))
				message_fatal(_("The environment variable "
						"%s contains too many "
						"arguments"), varname);
		}
	}

	// Allocate memory to hold pointers to the arguments. Add one to get
	// space for the terminating NULL (if some systems happen to need it).
	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
	argv[0] = argv0;
	argv[argc] = NULL;

	// Go through the string again. Split the arguments using '\0'
	// characters and add pointers to the resulting strings to argv.
	argc = 1;
	prev_was_space = true;
	for (size_t i = 0; env[i] != '\0'; ++i) {
		if (isspace((unsigned char)env[i])) {
			prev_was_space = true;
			env[i] = '\0';
		} else if (prev_was_space) {
			prev_was_space = false;
			argv[argc++] = env + i;
		}
	}

	// Parse the argument list we got from the environment. All non-option
	// arguments i.e. filenames are ignored.
	parse_real(args, argc, argv);

	// Reset the state of the getopt_long() so that we can parse the
	// command line options too. There are two incompatible ways to
	// do it.
#ifdef HAVE_OPTRESET
	// BSD
	optind = 1;
	optreset = 1;
#else
	// GNU, Solaris
	optind = 0;
#endif

	// We don't need the argument list from environment anymore.
	free(argv);
	free(env);

	return;
}


extern void
args_parse(args_info *args, int argc, char **argv)
{
	// Initialize those parts of *args that we need later.
	args->files_name = NULL;
	args->files_file = NULL;
	args->files_delim = '\0';

	// Check how we were called.
	{
		// Remove the leading path name, if any.
		const char *name = strrchr(argv[0], '/');
		if (name == NULL)
			name = argv[0];
		else
			++name;

		// NOTE: It's possible that name[0] is now '\0' if argv[0]
		// is weird, but it doesn't matter here.

		// Look for full command names instead of substrings like
		// "un", "cat", and "lz" to reduce possibility of false
		// positives when the programs have been renamed.
		if (strstr(name, "xzcat") != NULL) {
			opt_mode = MODE_DECOMPRESS;
			opt_stdout = true;
		} else if (strstr(name, "unxz") != NULL) {
			opt_mode = MODE_DECOMPRESS;
		} else if (strstr(name, "lzcat") != NULL) {
			opt_format = FORMAT_LZMA;
			opt_mode = MODE_DECOMPRESS;
			opt_stdout = true;
		} else if (strstr(name, "unlzma") != NULL) {
			opt_format = FORMAT_LZMA;
			opt_mode = MODE_DECOMPRESS;
		} else if (strstr(name, "lzma") != NULL) {
			opt_format = FORMAT_LZMA;
		}
	}

	// First the flags from the environment
	parse_environment(args, argv[0], "XZ_DEFAULTS");
	parse_environment(args, argv[0], "XZ_OPT");

	// Then from the command line
	parse_real(args, argc, argv);

	// Never remove the source file when the destination is not on disk.
	// In test mode the data is written nowhere, but setting opt_stdout
	// will make the rest of the code behave well.
	if (opt_stdout || opt_mode == MODE_TEST) {
		opt_keep_original = true;
		opt_stdout = true;
	}

	// When compressing, if no --format flag was used, or it
	// was --format=auto, we compress to the .xz format.
	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
		opt_format = FORMAT_XZ;

	// Compression settings need to be validated (options themselves and
	// their memory usage) when compressing to any file format. It has to
	// be done also when uncompressing raw data, since for raw decoding
	// the options given on the command line are used to know what kind
	// of raw data we are supposed to decode.
	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
		coder_set_compression_settings();

	// If no filenames are given, use stdin.
	if (argv[optind] == NULL && args->files_name == NULL) {
		// We don't modify or free() the "-" constant. The caller
		// modifies this so don't make the struct itself const.
		static char *names_stdin[2] = { (char *)"-", NULL };
		args->arg_names = names_stdin;
		args->arg_count = 1;
	} else {
		// We got at least one filename from the command line, or
		// --files or --files0 was specified.
		args->arg_names = argv + optind;
		args->arg_count = argc - optind;
	}

	return;
}