diff --git a/crates/bashkit-coreutils-port/src/args.rs b/crates/bashkit-coreutils-port/src/args.rs index 165bf358..6eaeb02d 100644 --- a/crates/bashkit-coreutils-port/src/args.rs +++ b/crates/bashkit-coreutils-port/src/args.rs @@ -645,28 +645,128 @@ fn collect_option_constants(file: &syn::File) -> Vec { // THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils. // Only a single clap builder expression may become executable bashkit // code; prefix statements would run during CI/parser construction. +// +// Two body shapes are accepted: +// +// 1. A single tail expression — the original shape: +// Command::new("foo").arg(...).arg(...) +// +// 2. A `let cmd = ;` binding followed by a tail +// expression that resumes the chain at that binder: +// let cmd = Command::new("foo").about(...); +// cmd.arg(...).arg(...) +// +// Upstream uutils introduces shape (2) for utilities (e.g. +// `truncate`) that route the Command through +// `uucore::clap_localization::configure_localized_command(cmd)`. +// The rewriter elides that call (the wrapper is unused by bashkit), +// leaving the body in the 2-stmt let-binding form which is still +// semantically a pure builder chain. The validator must accept it +// without weakening the per-expression safety checks. fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> { - let command_expr = match uu_app.block.stmts.as_slice() { - [Stmt::Expr(expr, None)] => expr, + match uu_app.block.stmts.as_slice() { + [Stmt::Expr(expr, None)] => validate_uu_app_expr_command_root(expr), + [Stmt::Local(local), Stmt::Expr(tail, None)] => validate_uu_app_let_binding(local, tail), stmts => bail!( - "unsafe uu_app body: expected a single clap Command builder expression, found {} statements", + "unsafe uu_app body: expected either a single clap Command builder \ + expression or a `let cmd = …;` binding followed by a builder \ + tail, found {} statements", stmts.len() ), - }; + } +} + +/// Run the expression-level safety checks (no closures, loops, match, +/// blocks, unsafe, async; only allowlisted macros; only allowlisted +/// method names) AND assert that the receiver chain bottoms out at +/// `Command::new(...)`. Used for the single-stmt body and for the +/// `let` initializer in the 2-stmt body. +fn validate_uu_app_expr_command_root(expr: &Expr) -> Result<()> { + validate_uu_app_expr_no_root_check(expr)?; + if !chain_roots_at_command_new(expr) { + bail!( + "unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain" + ); + } + Ok(()) +} +fn validate_uu_app_expr_no_root_check(expr: &Expr) -> Result<()> { let mut validator = UuAppExprValidator { errors: vec![] }; - syn::visit::Visit::visit_expr(&mut validator, command_expr); + syn::visit::Visit::visit_expr(&mut validator, expr); if !validator.errors.is_empty() { bail!("unsafe uu_app body: {}", validator.errors.join("; ")); } - if !chain_roots_at_command_new(command_expr) { + Ok(()) +} + +/// Validate the 2-stmt `let cmd = …; cmd.arg(...).arg(...)` shape. +/// +/// Constraints: +/// - The local must be `let = ;` — no `mut`, no `ref`, +/// no type annotation, no `else` branch, no subpattern. Anything +/// richer (tuple patterns, type ascription, refutable matches) opens +/// a hole the validator was designed to close. +/// - The initializer must itself pass the single-expression checks and +/// root at `Command::new(...)`. +/// - The tail must pass the expression-level checks AND its receiver +/// chain must bottom out at exactly the binder identifier — not at +/// another Command::new, not at a function call, not at a different +/// variable. This proves the tail is just continuing the builder +/// chain begun on the previous line. +fn validate_uu_app_let_binding(local: &syn::Local, tail: &Expr) -> Result<()> { + if local.attrs.iter().any(|a| !a.path().is_ident("doc")) { + bail!("unsafe uu_app body: `let` binding must not carry non-doc attributes"); + } + let Some(init) = &local.init else { + bail!("unsafe uu_app body: `let` binding must have an initializer"); + }; + if init.diverge.is_some() { + bail!("unsafe uu_app body: `let … else …` is not allowed"); + } + let binder_ident = match &local.pat { + syn::Pat::Ident(pat) => { + if pat.by_ref.is_some() || pat.mutability.is_some() || pat.subpat.is_some() { + bail!( + "unsafe uu_app body: `let` binding must be a plain `let = …;` \ + (no `mut`, no `ref`, no subpattern)" + ); + } + &pat.ident + } + _ => bail!( + "unsafe uu_app body: `let` binding must bind a single identifier, \ + not a destructuring pattern" + ), + }; + validate_uu_app_expr_command_root(&init.expr)?; + validate_uu_app_expr_no_root_check(tail)?; + if !chain_roots_at_ident(tail, binder_ident) { bail!( - "unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain" + "unsafe uu_app body: tail expression must continue the chain from \ + the `let` binder `{}` (e.g. `{}.arg(…).arg(…)`)", + binder_ident, + binder_ident, ); } Ok(()) } +/// Like `chain_roots_at_command_new`, but bottoms out at an `Expr::Path` +/// that names a single identifier matching `expected`. Used by the +/// 2-stmt validator to prove the tail's receiver chain begins at the +/// `let` binder rather than at some arbitrary expression. +fn chain_roots_at_ident(mut expr: &Expr, expected: &syn::Ident) -> bool { + while let Expr::MethodCall(mc) = expr { + expr = &mc.receiver; + } + let Expr::Path(p) = expr else { + return false; + }; + let segs = path_segments(&p.path); + matches!(segs.as_slice(), [single] if single == &expected.to_string()) +} + struct UuAppExprValidator { errors: Vec, } @@ -1203,4 +1303,158 @@ pub fn uu_app() -> clap::Command { let msg = format!("{err:#}"); assert!(msg.contains("Command::new"), "got: {msg}"); } + + #[test] + fn accepts_localized_command_let_binding() { + // Mirrors the upstream `truncate` shape: a `let cmd = Command::new(...)…` + // binding wrapped by `uucore::clap_localization::configure_localized_command(cmd)`. + // The rewriter unwraps the helper to bare `cmd`, leaving a 2-stmt + // body the validator must accept. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat") + .version(uucore::crate_version!()) + .about("concat") + .infer_long_args(true); + uucore::clap_localization::configure_localized_command(cmd) + .arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let body = run(&uutils, "cat", "poc").unwrap(); + assert!( + body.contains("pub fn cat_command() -> clap::Command"), + "got: {body}" + ); + assert!(body.contains("Command::new(\"cat\")"), "got: {body}"); + assert!( + body.contains("cmd.arg(Arg::new(options::FILE))"), + "got: {body}" + ); + // The localization wrapper must have been elided — the emitted + // tail starts at the `cmd` binder, not at the helper call. + assert!( + !body.contains("configure_localized_command"), + "wrapper must be elided, got: {body}" + ); + } + + #[test] + fn rejects_let_mut_binding_in_uu_app() { + // `let mut cmd = …;` opens room for reassignment in the body; + // the validator's no-mutation guarantee rests on the binding + // being immutable. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let mut cmd = Command::new("cat"); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("no `mut`"), "got: {msg}"); + } + + #[test] + fn rejects_three_statement_uu_app() { + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat"); + let _ = std::fs::write("poc", b"owned"); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("found 3 statements"), "got: {msg}"); + } + + #[test] + fn rejects_tail_rooted_at_other_ident() { + // Tail must continue from the `let` binder; rooting at a + // different ident (here `other`) would let attacker code + // smuggle in a Command from any in-scope identifier. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat"); + other.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!( + msg.contains("continue the chain from the `let` binder"), + "got: {msg}" + ); + } + + #[test] + fn rejects_let_init_not_rooted_at_command_new() { + // Initializer of the `let` must itself bottom out at + // `Command::new(...)`. Otherwise an attacker could bind + // anything (e.g. `some::call()`) and chain `.arg(…)` on it. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = some::factory(); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("Command::new"), "got: {msg}"); + } }