diff --git a/crates/bashkit/src/testing.rs b/crates/bashkit/src/testing.rs index 97bbba50..7ebf9e93 100644 --- a/crates/bashkit/src/testing.rs +++ b/crates/bashkit/src/testing.rs @@ -144,9 +144,9 @@ fn strip_real_shell_error_lines(stderr: &str) -> String { lines.join("\n") } -/// Recognize stderr lines that bash or ls produces verbatim from user -/// input. Conservative: only strips if the prefix is `bash: ` or `ls: ` -/// AND the line ends with a known real-shell error suffix. +/// Recognize stderr lines that bash, ls, or a uutils clap CLI produces +/// verbatim from user input. Conservative: each branch matches a fixed +/// real-shell error template that quotes input. fn is_real_shell_error_line(line: &str) -> bool { const SHELL_ERROR_SUFFIXES: &[&str] = &[ ": command not found", @@ -176,6 +176,62 @@ fn is_real_shell_error_line(line: &str) -> bool { } return false; } + is_clap_error_chrome_line(line) +} + +/// Recognize the four-line clap CLI error template that uutils builtins +/// (`ls`, `cat`, `truncate`, …) emit when a user passes an unknown flag. +/// Each line quotes the offending argument verbatim, so a fuzz input +/// containing `/rustc/` becomes +/// `error: unexpected argument '--i{fi/rustc/fi{{RRi' found` and trips +/// the banned-shape check even though no internal Debug formatter ran. +/// +/// Patterns are anchored on clap-specific chrome that does not occur in +/// real Debug leaks: the literal `unexpected argument '`, the leading +/// ` tip: to pass '`, the exact `Usage: ` prefix, and the +/// `For more information, try '` help footer. +fn is_clap_error_chrome_line(line: &str) -> bool { + // `error: unexpected argument '' found` + // `error: invalid value '' for '': ` + // `error: the argument '' cannot be used with ''` + // `error: unrecognized subcommand ''` + if let Some(rest) = line.strip_prefix("error: ") { + const CLAP_ERROR_FRAGMENTS: &[&str] = &[ + "unexpected argument '", + "invalid value '", + "the argument '", + "unrecognized subcommand '", + "the following required arguments were not provided", + "a value is required for '", + "equal sign is needed when assigning values to '", + ]; + if CLAP_ERROR_FRAGMENTS.iter().any(|frag| rest.contains(frag)) { + return true; + } + return false; + } + // ` tip: to pass '' as a value, use '-- '` + if let Some(rest) = line.strip_prefix(" tip: ") { + if rest.starts_with("to pass '") && rest.contains("' as a value, use '") { + return true; + } + return false; + } + // `Usage: [OPTION]... [FILE]...` — user input does not appear + // in the Usage line itself, but it follows the error/tip block and + // we strip it for completeness so the leftover stderr is a clean + // signal of real internal leaks. + if let Some(rest) = line.strip_prefix("Usage: ") { + if rest.contains(" [") || rest.ends_with(" --help") || rest.ends_with(" --version") { + return true; + } + return false; + } + // `For more information, try '--help'.` / + // `For more information, try ' --help'.` + if line.starts_with("For more information, try '") && line.ends_with("'.") { + return true; + } false } @@ -302,4 +358,55 @@ mod tests { assert!(!stripped.contains("cannot access")); assert!(stripped.contains("thread panicked")); } + + #[test] + fn strip_removes_clap_unexpected_argument_block() { + // From a real glob_fuzz failure on main: input bytes contained + // `--i{fi/rustc/fi{{RRi`; uutils `ls` (clap) echoed it back in + // its standard four-line error template. The banned shape + // `/rustc/` came from the user input, not an internal formatter. + let s = "error: unexpected argument '--i{fi/rustc/fi{{RRi' found\n\ + \n\ + \x20\x20tip: to pass '--i{fi/rustc/fi{{RRi' as a value, use '-- --i{fi/rustc/fi{{RRi'\n\ + \n\ + Usage: ls [OPTION]... [FILE]...\n\ + \n\ + For more information, try '--help'.\n"; + let stripped = strip_real_shell_error_lines(s); + assert!(!stripped.contains("/rustc/"), "stripped: {stripped:?}"); + assert!( + !stripped.contains("unexpected argument"), + "stripped: {stripped:?}" + ); + assert!(!stripped.contains("Usage:"), "stripped: {stripped:?}"); + assert!( + !stripped.contains("For more information"), + "stripped: {stripped:?}" + ); + } + + #[test] + fn strip_removes_clap_invalid_value_line() { + let s = "error: invalid value 'Span {abc' for '--width ': not a number\n"; + assert_eq!(strip_real_shell_error_lines(s), ""); + } + + #[test] + fn strip_keeps_unrelated_error_prefix_lines() { + // `error: ` lines that are not clap chrome must remain — e.g. + // a real internal error with a Debug shape glued on. + let s = "error: parser failed: Tok::Ident\n"; + let stripped = strip_real_shell_error_lines(s); + assert!(stripped.contains("Tok::"), "stripped: {stripped:?}"); + } + + #[test] + fn strip_keeps_usage_lookalikes() { + // A line that begins with `Usage:` but lacks the clap shape + // (no bracketed option block, no --help/--version trailer) + // must stay — defense against masking real leaks. + let s = "Usage: see Span { for details\n"; + let stripped = strip_real_shell_error_lines(s); + assert!(stripped.contains("Span {"), "stripped: {stripped:?}"); + } } diff --git a/specs/threat-model.md b/specs/threat-model.md index a7a87828..60318b0b 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -470,20 +470,26 @@ machinery (`bashkit::testing::assert_fuzz_invariants`): trips this. Fuzz/proptest targets inline arbitrary input bytes into shell scripts, -so bash and ls produce error messages that quote the input verbatim -(`bash: : command not found`, `bash: : No such file or -directory`, `ls: cannot access '': …`). Those echoes can -accidentally form a banned substring — e.g. user input `Tok"` becomes -the command name `Tok:`, and bash's `bash: %s: command not found` -formatter renders it as `bash: Tok:: command not found`, matching the -parser-token shape `Tok::`. They are not internal Debug leaks. To keep -the leak detector strict on real internals while suppressing this class -of false positive, `assert_fuzz_invariants` strips lines that match a -recognized real-shell error template before the banned-shape check; -the byte-length cap and the host-canary check still run on the -unfiltered stderr so flood and TM-INF-013 regressions are still -caught. The strict per-builtin path (`assert_no_leak`) is unchanged — -non-fuzz tests must not produce shell echoes in the first place. +so bash, ls, and uutils clap CLIs produce error messages that quote the +input verbatim (`bash: : command not found`, `bash: : No such +file or directory`, `ls: cannot access '': …`, +`error: unexpected argument '' found`, +` tip: to pass '' as a value, use '-- '`, +`Usage: ls [OPTION]... [FILE]...`, +`For more information, try '--help'.`). Those echoes can accidentally +form a banned substring — e.g. user input `Tok"` becomes the command +name `Tok:`, and bash's `bash: %s: command not found` formatter renders +it as `bash: Tok:: command not found`, matching the parser-token shape +`Tok::`; or user input `--i{fi/rustc/fi{{RRi` lands in clap's +`error: unexpected argument '…' found` chrome and trips the `/rustc/` +host-path check. They are not internal Debug leaks. To keep the leak +detector strict on real internals while suppressing this class of false +positive, `assert_fuzz_invariants` strips lines that match a recognized +real-shell or clap error template before the banned-shape check; the +byte-length cap and the host-canary check still run on the unfiltered +stderr so flood and TM-INF-013 regressions are still caught. The strict +per-builtin path (`assert_no_leak`) is unchanged — non-fuzz tests must +not produce shell echoes in the first place. **TM-INF-013**: The jq builtin previously called `std::env::set_var()` to expose shell variables to jaq's `env` function. This also made host process env vars (API keys, tokens)