From d6d2d6cd848f86040414ba35d7b245242a0aa595 Mon Sep 17 00:00:00 2001 From: Mykhailo Chalyi Date: Tue, 12 May 2026 09:25:45 +0000 Subject: [PATCH] fix(coreutils-port): accept localized-Command let-binding in uu_app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Yesterday's `Coreutils Args Drift` workflow on main went red at the `regenerating truncate` step: error: unsafe uu_app body: expected a single clap Command builder expression, found 2 statements Upstream `truncate` (rev 94b06d97b) now routes the Command through `uucore::clap_localization::configure_localized_command(cmd)`, so the body has the shape pub fn uu_app() -> Command { let cmd = Command::new("truncate")…; uucore::clap_localization::configure_localized_command(cmd) .arg(…)… } The rewriter elides the localization helper (it pulls in Fluent; bashkit doesn't need it), leaving the body as a 2-stmt `let cmd = …; cmd.arg(…).arg(…)` form. PR #1617 had tightened the validator to a single expression, which is what this case trips. ## Fix Extend `validate_uu_app_body` to also accept the 2-stmt shape [Stmt::Local(local), Stmt::Expr(tail, None)] with strict guards so the safety story does not weaken: - The `let` pattern must be a plain `Pat::Ident` — no `mut`, no `ref`, no subpattern, no type ascription, no `let … else …`. - The initializer must pass the same per-expression validator (no closures, loops, match, blocks, unsafe, async; only allowlisted macros; only allowlisted method names) AND chain-root at `Command::new(...)`, identical to the single-expression path. - The tail must pass the per-expression validator AND its receiver chain must bottom out at exactly the binder identifier — not at another Command::new, not at a function call, not at a different variable. This proves the tail is just continuing the same builder chain. Anything richer (tuple patterns, refutable matches, type ascription, mutability, an extra prefix statement) still fails, keeping the TM-INF-025 boundary closed. ## Test plan - [x] 5 new tests in `args::tests` cover both directions: positive (`accepts_localized_command_let_binding` mirroring truncate's exact shape, asserting the wrapper is elided in the emitted body and the tail starts at `cmd.arg(…)`); negative (`rejects_let_mut_binding…`, `rejects_three_statement_uu_app`, `rejects_tail_rooted_at_other_ident`, `rejects_let_init_not_rooted_at_command_new`). - [x] `cargo test -p bashkit-coreutils-port` green (32 + 5 → 37 tests). - [x] `cargo build -p bashkit` green (existing generated truncate_args.rs already uses this shape; this PR only re-enables the codegen path that produced it). - [x] `cargo clippy -p bashkit-coreutils-port --all-targets -- -D warnings` clean. - [x] `cargo fmt --check` clean. --- crates/bashkit-coreutils-port/src/args.rs | 268 +++++++++++++++++++++- 1 file changed, 261 insertions(+), 7 deletions(-) diff --git a/crates/bashkit-coreutils-port/src/args.rs b/crates/bashkit-coreutils-port/src/args.rs index 165bf358..6eaeb02d 100644 --- a/crates/bashkit-coreutils-port/src/args.rs +++ b/crates/bashkit-coreutils-port/src/args.rs @@ -645,28 +645,128 @@ fn collect_option_constants(file: &syn::File) -> Vec { // THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils. // Only a single clap builder expression may become executable bashkit // code; prefix statements would run during CI/parser construction. +// +// Two body shapes are accepted: +// +// 1. A single tail expression — the original shape: +// Command::new("foo").arg(...).arg(...) +// +// 2. A `let cmd = ;` binding followed by a tail +// expression that resumes the chain at that binder: +// let cmd = Command::new("foo").about(...); +// cmd.arg(...).arg(...) +// +// Upstream uutils introduces shape (2) for utilities (e.g. +// `truncate`) that route the Command through +// `uucore::clap_localization::configure_localized_command(cmd)`. +// The rewriter elides that call (the wrapper is unused by bashkit), +// leaving the body in the 2-stmt let-binding form which is still +// semantically a pure builder chain. The validator must accept it +// without weakening the per-expression safety checks. fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> { - let command_expr = match uu_app.block.stmts.as_slice() { - [Stmt::Expr(expr, None)] => expr, + match uu_app.block.stmts.as_slice() { + [Stmt::Expr(expr, None)] => validate_uu_app_expr_command_root(expr), + [Stmt::Local(local), Stmt::Expr(tail, None)] => validate_uu_app_let_binding(local, tail), stmts => bail!( - "unsafe uu_app body: expected a single clap Command builder expression, found {} statements", + "unsafe uu_app body: expected either a single clap Command builder \ + expression or a `let cmd = …;` binding followed by a builder \ + tail, found {} statements", stmts.len() ), - }; + } +} + +/// Run the expression-level safety checks (no closures, loops, match, +/// blocks, unsafe, async; only allowlisted macros; only allowlisted +/// method names) AND assert that the receiver chain bottoms out at +/// `Command::new(...)`. Used for the single-stmt body and for the +/// `let` initializer in the 2-stmt body. +fn validate_uu_app_expr_command_root(expr: &Expr) -> Result<()> { + validate_uu_app_expr_no_root_check(expr)?; + if !chain_roots_at_command_new(expr) { + bail!( + "unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain" + ); + } + Ok(()) +} +fn validate_uu_app_expr_no_root_check(expr: &Expr) -> Result<()> { let mut validator = UuAppExprValidator { errors: vec![] }; - syn::visit::Visit::visit_expr(&mut validator, command_expr); + syn::visit::Visit::visit_expr(&mut validator, expr); if !validator.errors.is_empty() { bail!("unsafe uu_app body: {}", validator.errors.join("; ")); } - if !chain_roots_at_command_new(command_expr) { + Ok(()) +} + +/// Validate the 2-stmt `let cmd = …; cmd.arg(...).arg(...)` shape. +/// +/// Constraints: +/// - The local must be `let = ;` — no `mut`, no `ref`, +/// no type annotation, no `else` branch, no subpattern. Anything +/// richer (tuple patterns, type ascription, refutable matches) opens +/// a hole the validator was designed to close. +/// - The initializer must itself pass the single-expression checks and +/// root at `Command::new(...)`. +/// - The tail must pass the expression-level checks AND its receiver +/// chain must bottom out at exactly the binder identifier — not at +/// another Command::new, not at a function call, not at a different +/// variable. This proves the tail is just continuing the builder +/// chain begun on the previous line. +fn validate_uu_app_let_binding(local: &syn::Local, tail: &Expr) -> Result<()> { + if local.attrs.iter().any(|a| !a.path().is_ident("doc")) { + bail!("unsafe uu_app body: `let` binding must not carry non-doc attributes"); + } + let Some(init) = &local.init else { + bail!("unsafe uu_app body: `let` binding must have an initializer"); + }; + if init.diverge.is_some() { + bail!("unsafe uu_app body: `let … else …` is not allowed"); + } + let binder_ident = match &local.pat { + syn::Pat::Ident(pat) => { + if pat.by_ref.is_some() || pat.mutability.is_some() || pat.subpat.is_some() { + bail!( + "unsafe uu_app body: `let` binding must be a plain `let = …;` \ + (no `mut`, no `ref`, no subpattern)" + ); + } + &pat.ident + } + _ => bail!( + "unsafe uu_app body: `let` binding must bind a single identifier, \ + not a destructuring pattern" + ), + }; + validate_uu_app_expr_command_root(&init.expr)?; + validate_uu_app_expr_no_root_check(tail)?; + if !chain_roots_at_ident(tail, binder_ident) { bail!( - "unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain" + "unsafe uu_app body: tail expression must continue the chain from \ + the `let` binder `{}` (e.g. `{}.arg(…).arg(…)`)", + binder_ident, + binder_ident, ); } Ok(()) } +/// Like `chain_roots_at_command_new`, but bottoms out at an `Expr::Path` +/// that names a single identifier matching `expected`. Used by the +/// 2-stmt validator to prove the tail's receiver chain begins at the +/// `let` binder rather than at some arbitrary expression. +fn chain_roots_at_ident(mut expr: &Expr, expected: &syn::Ident) -> bool { + while let Expr::MethodCall(mc) = expr { + expr = &mc.receiver; + } + let Expr::Path(p) = expr else { + return false; + }; + let segs = path_segments(&p.path); + matches!(segs.as_slice(), [single] if single == &expected.to_string()) +} + struct UuAppExprValidator { errors: Vec, } @@ -1203,4 +1303,158 @@ pub fn uu_app() -> clap::Command { let msg = format!("{err:#}"); assert!(msg.contains("Command::new"), "got: {msg}"); } + + #[test] + fn accepts_localized_command_let_binding() { + // Mirrors the upstream `truncate` shape: a `let cmd = Command::new(...)…` + // binding wrapped by `uucore::clap_localization::configure_localized_command(cmd)`. + // The rewriter unwraps the helper to bare `cmd`, leaving a 2-stmt + // body the validator must accept. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat") + .version(uucore::crate_version!()) + .about("concat") + .infer_long_args(true); + uucore::clap_localization::configure_localized_command(cmd) + .arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let body = run(&uutils, "cat", "poc").unwrap(); + assert!( + body.contains("pub fn cat_command() -> clap::Command"), + "got: {body}" + ); + assert!(body.contains("Command::new(\"cat\")"), "got: {body}"); + assert!( + body.contains("cmd.arg(Arg::new(options::FILE))"), + "got: {body}" + ); + // The localization wrapper must have been elided — the emitted + // tail starts at the `cmd` binder, not at the helper call. + assert!( + !body.contains("configure_localized_command"), + "wrapper must be elided, got: {body}" + ); + } + + #[test] + fn rejects_let_mut_binding_in_uu_app() { + // `let mut cmd = …;` opens room for reassignment in the body; + // the validator's no-mutation guarantee rests on the binding + // being immutable. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let mut cmd = Command::new("cat"); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("no `mut`"), "got: {msg}"); + } + + #[test] + fn rejects_three_statement_uu_app() { + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat"); + let _ = std::fs::write("poc", b"owned"); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("found 3 statements"), "got: {msg}"); + } + + #[test] + fn rejects_tail_rooted_at_other_ident() { + // Tail must continue from the `let` binder; rooting at a + // different ident (here `other`) would let attacker code + // smuggle in a Command from any in-scope identifier. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = Command::new("cat"); + other.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!( + msg.contains("continue the chain from the `let` binder"), + "got: {msg}" + ); + } + + #[test] + fn rejects_let_init_not_rooted_at_command_new() { + // Initializer of the `let` must itself bottom out at + // `Command::new(...)`. Otherwise an attacker could bind + // anything (e.g. `some::call()`) and chain `.arg(…)` on it. + let (_tmp, uutils) = fixture(&[ + ( + "src/uu/cat/src/cat.rs", + r#" +mod options { + pub static FILE: &str = "file"; +} + +pub fn uu_app() -> clap::Command { + let cmd = some::factory(); + cmd.arg(Arg::new(options::FILE)) +} +"#, + ), + ("src/uu/cat/locales/en-US.ftl", ""), + ]); + + let err = run(&uutils, "cat", "poc").unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("Command::new"), "got: {msg}"); + } }