diff --git a/src/xargs/mod.rs b/src/xargs/mod.rs index 19c2de67..47b1a3b3 100644 --- a/src/xargs/mod.rs +++ b/src/xargs/mod.rs @@ -33,6 +33,7 @@ mod options { pub const NULL: &str = "null"; pub const REPLACE: &str = "replace"; pub const REPLACE_I: &str = "replace-I"; + pub const SHOW_LIMITS: &str = "show-limits"; pub const VERBOSE: &str = "verbose"; } @@ -46,6 +47,7 @@ struct Options { no_run_if_empty: bool, null: bool, replace: Option, + show_limits: bool, verbose: bool, eof_delimiter: Option, } @@ -175,20 +177,76 @@ impl MaxCharsCommandSizeLimiter { #[cfg(unix)] fn new_system(env: &HashMap) -> Self { - // POSIX requires that we leave 2048 bytes of space so that the child processes - // can have room to set their own environment variables. - const ARG_HEADROOM: usize = 2048; - let arg_max = unsafe { uucore::libc::sysconf(uucore::libc::_SC_ARG_MAX) } as usize; + Self::new(system_command_size_limit(env)) + } +} - let env_size: usize = env - .iter() - .map(|(var, value)| count_osstr_chars_for_exec(var) + count_osstr_chars_for_exec(value)) - .sum(); +const POSIX_MIN_ARG_MAX: usize = 4096; - Self::new(arg_max - ARG_HEADROOM - env_size) +#[cfg(unix)] +const ARG_HEADROOM: usize = 2048; + +#[cfg(unix)] +fn system_arg_max() -> usize { + let arg_max = unsafe { uucore::libc::sysconf(uucore::libc::_SC_ARG_MAX) }; + if arg_max > 0 { + arg_max as usize + } else { + POSIX_MIN_ARG_MAX } } +#[cfg(windows)] +fn system_arg_max() -> usize { + // Taken from the CreateProcess docs. + 32767 +} + +fn environment_size(env: &HashMap) -> usize { + env.iter() + .map(|(var, value)| count_osstr_chars_for_exec(var) + count_osstr_chars_for_exec(value)) + .sum() +} + +#[cfg(unix)] +fn system_command_size_limit(env: &HashMap) -> usize { + // POSIX requires that we leave 2048 bytes of space so that the child + // processes can have room to set their own environment variables. + system_arg_max() + .saturating_sub(ARG_HEADROOM) + .saturating_sub(environment_size(env)) +} + +#[cfg(windows)] +fn system_command_size_limit(_env: &HashMap) -> usize { + system_arg_max() +} + +fn show_limits(env: &HashMap, max_chars: Option) { + // Match the GNU xargs diagnostics that downstream scripts parse: + // https://git.savannah.gnu.org/cgit/findutils.git/tree/xargs/xargs.c?h=v4.10.0#n795 + let system_limit = system_command_size_limit(env); + let buffer_size = max_chars.unwrap_or(system_limit).min(system_limit); + + eprintln!( + "Your environment variables take up {} bytes", + environment_size(env) + ); + eprintln!( + "POSIX upper limit on argument length (this system): {}", + system_arg_max() + ); + eprintln!( + "POSIX smallest allowable upper limit on argument length (all systems): {POSIX_MIN_ARG_MAX}" + ); + eprintln!("Maximum length of command we could actually use: {system_limit}"); + eprintln!("Size of command buffer we are actually using: {buffer_size}"); + eprintln!( + "Maximum parallelism (--max-procs must be no greater): {}", + i32::MAX + ); +} + impl CommandSizeLimiter for MaxCharsCommandSizeLimiter { fn try_arg( &mut self, @@ -884,6 +942,7 @@ fn normalize_options(options: Options, matches: &clap::ArgMatches) -> Options { no_run_if_empty: options.no_run_if_empty, null: options.null, replace, + show_limits: options.show_limits, verbose: options.verbose, eof_delimiter, } @@ -985,6 +1044,12 @@ fn do_xargs(args: &[&str]) -> Result { ) .value_parser(validate_positive_usize), ) + .arg( + Arg::new(options::SHOW_LIMITS) + .long(options::SHOW_LIMITS) + .help("Display the command-line length limits and exit") + .action(ArgAction::SetTrue), + ) .arg( Arg::new(options::VERBOSE) .short('t') @@ -1082,6 +1147,7 @@ fn do_xargs(args: &[&str]) -> Result { .map_or_else(|| "{}".to_string(), std::borrow::ToOwned::to_owned) }) }), + show_limits: matches.get_flag(options::SHOW_LIMITS), verbose: matches.get_flag(options::VERBOSE), eof_delimiter: [options::EOF_E, options::EOF].iter().find_map(|&option| { matches.contains_id(option).then(|| { @@ -1102,6 +1168,11 @@ fn do_xargs(args: &[&str]) -> Result { }; let env = std::env::vars_os().collect(); + if options.show_limits { + show_limits(&env, options.max_chars); + return Ok(CommandResult::Success); + } + let mut limiters = LimiterCollection::new(); if let Some(max_args) = options.max_args { limiters.add(MaxArgsCommandSizeLimiter::new(max_args)); diff --git a/tests/test_xargs.rs b/tests/test_xargs.rs index 3faa0421..038a676f 100644 --- a/tests/test_xargs.rs +++ b/tests/test_xargs.rs @@ -169,6 +169,26 @@ fn xargs_max_chars() { .no_stdout(); } +#[test] +fn xargs_show_limits() { + // Debian's python3.13 maintainer scripts parse this GNU xargs diagnostic + // to choose a safe --max-chars value: + // https://sources.debian.org/src/python3.13/3.13.5-2%2Bdeb13u2/debian/libPVER-minimal.prerm.in/#L8 + // GNU findutils emits the compatibility target here: + // https://git.savannah.gnu.org/cgit/findutils.git/tree/xargs/xargs.c?h=v4.10.0#n795 + let output = ucmd().args(&["--show-limits"]).pipe_in("").succeeds(); + let result = output.no_stdout(); + + let max = result + .stderr_str() + .lines() + .find_map(|line| line.strip_prefix("Maximum length of command we could actually use: ")) + .expect("expected GNU-compatible maximum length diagnostic") + .parse::() + .expect("expected maximum length diagnostic to end in an integer"); + assert!(max > 0, "expected a positive maximum length, got {max}"); +} + #[test] fn xargs_exit_on_large() { ucmd()