Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 261 additions & 7 deletions crates/bashkit-coreutils-port/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -645,28 +645,128 @@ fn collect_option_constants(file: &syn::File) -> Vec<Item> {
// THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils.
// Only a single clap builder expression may become executable bashkit
// code; prefix statements would run during CI/parser construction.
//
// Two body shapes are accepted:
//
// 1. A single tail expression — the original shape:
// Command::new("foo").arg(...).arg(...)
//
// 2. A `let cmd = <Command::new chain>;` binding followed by a tail
// expression that resumes the chain at that binder:
// let cmd = Command::new("foo").about(...);
// cmd.arg(...).arg(...)
//
// Upstream uutils introduces shape (2) for utilities (e.g.
// `truncate`) that route the Command through
// `uucore::clap_localization::configure_localized_command(cmd)`.
// The rewriter elides that call (the wrapper is unused by bashkit),
// leaving the body in the 2-stmt let-binding form which is still
// semantically a pure builder chain. The validator must accept it
// without weakening the per-expression safety checks.
fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> {
let command_expr = match uu_app.block.stmts.as_slice() {
[Stmt::Expr(expr, None)] => expr,
match uu_app.block.stmts.as_slice() {
[Stmt::Expr(expr, None)] => validate_uu_app_expr_command_root(expr),
[Stmt::Local(local), Stmt::Expr(tail, None)] => validate_uu_app_let_binding(local, tail),
stmts => bail!(
"unsafe uu_app body: expected a single clap Command builder expression, found {} statements",
"unsafe uu_app body: expected either a single clap Command builder \
expression or a `let cmd = …;` binding followed by a builder \
tail, found {} statements",
stmts.len()
),
};
}
}

/// Run the expression-level safety checks (no closures, loops, match,
/// blocks, unsafe, async; only allowlisted macros; only allowlisted
/// method names) AND assert that the receiver chain bottoms out at
/// `Command::new(...)`. Used for the single-stmt body and for the
/// `let` initializer in the 2-stmt body.
fn validate_uu_app_expr_command_root(expr: &Expr) -> Result<()> {
validate_uu_app_expr_no_root_check(expr)?;
if !chain_roots_at_command_new(expr) {
bail!(
"unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain"
);
}
Ok(())
}

fn validate_uu_app_expr_no_root_check(expr: &Expr) -> Result<()> {
let mut validator = UuAppExprValidator { errors: vec![] };
syn::visit::Visit::visit_expr(&mut validator, command_expr);
syn::visit::Visit::visit_expr(&mut validator, expr);
if !validator.errors.is_empty() {
bail!("unsafe uu_app body: {}", validator.errors.join("; "));
}
if !chain_roots_at_command_new(command_expr) {
Ok(())
}

/// Validate the 2-stmt `let cmd = …; cmd.arg(...).arg(...)` shape.
///
/// Constraints:
/// - The local must be `let <ident> = <init>;` — no `mut`, no `ref`,
/// no type annotation, no `else` branch, no subpattern. Anything
/// richer (tuple patterns, type ascription, refutable matches) opens
/// a hole the validator was designed to close.
/// - The initializer must itself pass the single-expression checks and
/// root at `Command::new(...)`.
/// - The tail must pass the expression-level checks AND its receiver
/// chain must bottom out at exactly the binder identifier — not at
/// another Command::new, not at a function call, not at a different
/// variable. This proves the tail is just continuing the builder
/// chain begun on the previous line.
fn validate_uu_app_let_binding(local: &syn::Local, tail: &Expr) -> Result<()> {
if local.attrs.iter().any(|a| !a.path().is_ident("doc")) {
bail!("unsafe uu_app body: `let` binding must not carry non-doc attributes");
}
let Some(init) = &local.init else {
bail!("unsafe uu_app body: `let` binding must have an initializer");
};
if init.diverge.is_some() {
bail!("unsafe uu_app body: `let … else …` is not allowed");
}
let binder_ident = match &local.pat {
syn::Pat::Ident(pat) => {
if pat.by_ref.is_some() || pat.mutability.is_some() || pat.subpat.is_some() {
bail!(
"unsafe uu_app body: `let` binding must be a plain `let <ident> = …;` \
(no `mut`, no `ref`, no subpattern)"
);
}
&pat.ident
}
_ => bail!(
"unsafe uu_app body: `let` binding must bind a single identifier, \
not a destructuring pattern"
),
};
validate_uu_app_expr_command_root(&init.expr)?;
validate_uu_app_expr_no_root_check(tail)?;
if !chain_roots_at_ident(tail, binder_ident) {
bail!(
"unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain"
"unsafe uu_app body: tail expression must continue the chain from \
the `let` binder `{}` (e.g. `{}.arg(…).arg(…)`)",
binder_ident,
binder_ident,
);
}
Ok(())
}

/// Like `chain_roots_at_command_new`, but bottoms out at an `Expr::Path`
/// that names a single identifier matching `expected`. Used by the
/// 2-stmt validator to prove the tail's receiver chain begins at the
/// `let` binder rather than at some arbitrary expression.
fn chain_roots_at_ident(mut expr: &Expr, expected: &syn::Ident) -> bool {
while let Expr::MethodCall(mc) = expr {
expr = &mc.receiver;
}
let Expr::Path(p) = expr else {
return false;
};
let segs = path_segments(&p.path);
matches!(segs.as_slice(), [single] if single == &expected.to_string())
}

struct UuAppExprValidator {
errors: Vec<String>,
}
Expand Down Expand Up @@ -1203,4 +1303,158 @@ pub fn uu_app() -> clap::Command {
let msg = format!("{err:#}");
assert!(msg.contains("Command::new"), "got: {msg}");
}

#[test]
fn accepts_localized_command_let_binding() {
// Mirrors the upstream `truncate` shape: a `let cmd = Command::new(...)…`
// binding wrapped by `uucore::clap_localization::configure_localized_command(cmd)`.
// The rewriter unwraps the helper to bare `cmd`, leaving a 2-stmt
// body the validator must accept.
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat")
.version(uucore::crate_version!())
.about("concat")
.infer_long_args(true);
uucore::clap_localization::configure_localized_command(cmd)
.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let body = run(&uutils, "cat", "poc").unwrap();
assert!(
body.contains("pub fn cat_command() -> clap::Command"),
"got: {body}"
);
assert!(body.contains("Command::new(\"cat\")"), "got: {body}");
assert!(
body.contains("cmd.arg(Arg::new(options::FILE))"),
"got: {body}"
);
// The localization wrapper must have been elided — the emitted
// tail starts at the `cmd` binder, not at the helper call.
assert!(
!body.contains("configure_localized_command"),
"wrapper must be elided, got: {body}"
);
}

#[test]
fn rejects_let_mut_binding_in_uu_app() {
// `let mut cmd = …;` opens room for reassignment in the body;
// the validator's no-mutation guarantee rests on the binding
// being immutable.
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let mut cmd = Command::new("cat");
cmd.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("no `mut`"), "got: {msg}");
}

#[test]
fn rejects_three_statement_uu_app() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat");
let _ = std::fs::write("poc", b"owned");
cmd.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("found 3 statements"), "got: {msg}");
}

#[test]
fn rejects_tail_rooted_at_other_ident() {
// Tail must continue from the `let` binder; rooting at a
// different ident (here `other`) would let attacker code
// smuggle in a Command from any in-scope identifier.
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat");
other.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(
msg.contains("continue the chain from the `let` binder"),
"got: {msg}"
);
}

#[test]
fn rejects_let_init_not_rooted_at_command_new() {
// Initializer of the `let` must itself bottom out at
// `Command::new(...)`. Otherwise an attacker could bind
// anything (e.g. `some::call()`) and chain `.arg(…)` on it.
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = some::factory();
cmd.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("Command::new"), "got: {msg}");
}
}
Loading