Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 212 additions & 8 deletions crates/bashkit-coreutils-port/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -643,30 +643,122 @@ fn collect_option_constants(file: &syn::File) -> Vec<Item> {
}

// THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils.
// Only a single clap builder expression may become executable bashkit
// code; prefix statements would run during CI/parser construction.
// Only clap builder expressions may become executable bashkit code;
// any other statement shape would run during CI/parser construction.
//
// Two accepted body shapes:
// 1. `<Command::new(...) chain>` (single tail expression)
// 2. `let <ident> = <Command::new(...) chain>;
// <ident>.<builder method>(...)...`
// — equivalent to a single chain split across a binding. The
// tail's innermost receiver must be the let-bound identifier;
// both halves run through the same chain validator.
fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> {
let command_expr = match uu_app.block.stmts.as_slice() {
[Stmt::Expr(expr, None)] => expr,
match uu_app.block.stmts.as_slice() {
[Stmt::Expr(expr, None)] => validate_command_chain_expr(expr),
[Stmt::Local(local), Stmt::Expr(tail_expr, None)] => {
validate_let_bound_command_body(local, tail_expr)
}
stmts => bail!(
"unsafe uu_app body: expected a single clap Command builder expression, found {} statements",
"unsafe uu_app body: expected a single clap Command builder expression \
or `let <ident> = Command::new(...)...; <ident>.method(...)...`, \
found {} statements",
stmts.len()
),
};
}
}

fn validate_command_chain_expr(expr: &Expr) -> Result<()> {
let mut validator = UuAppExprValidator { errors: vec![] };
syn::visit::Visit::visit_expr(&mut validator, command_expr);
syn::visit::Visit::visit_expr(&mut validator, expr);
if !validator.errors.is_empty() {
bail!("unsafe uu_app body: {}", validator.errors.join("; "));
}
if !chain_roots_at_command_new(command_expr) {
if !chain_roots_at_command_new(expr) {
bail!(
"unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain"
);
}
Ok(())
}

fn validate_let_bound_command_body(local: &syn::Local, tail_expr: &Expr) -> Result<()> {
let binding_ident = simple_let_ident(local).ok_or_else(|| {
anyhow!(
"unsafe uu_app body: let binding must be `let <ident> = ...` \
(no destructuring, no let-else)"
)
})?;
let init = local
.init
.as_ref()
.ok_or_else(|| anyhow!("unsafe uu_app body: let binding must have an initializer"))?;
if init.diverge.is_some() {
bail!("unsafe uu_app body: let-else is not allowed");
}
// The bound expression must itself be a Command::new(...) builder
// chain so the only thing the binding can hold is a clap Command.
validate_command_chain_expr(&init.expr)?;

// The tail must be a method chain whose innermost receiver is the
// let-bound identifier — i.e. structurally a continuation of the
// builder chain.
let mut validator = UuAppExprValidator { errors: vec![] };
syn::visit::Visit::visit_expr(&mut validator, tail_expr);
if !validator.errors.is_empty() {
bail!("unsafe uu_app body: {}", validator.errors.join("; "));
}
if !tail_chains_back_to_ident(tail_expr, binding_ident) {
bail!(
"unsafe uu_app body: tail expression must be a `{}.method(...)...` \
chain on the let binding",
binding_ident
);
}
Ok(())
}

/// Returns the bound identifier of a plain `let <ident> [: T] = ...;`.
/// Rejects destructuring, `ref`, subpatterns, or attributes — anything
/// that could hide an active expression inside the binding pattern.
fn simple_let_ident(local: &syn::Local) -> Option<&syn::Ident> {
let pat = match &local.pat {
syn::Pat::Type(pt) => &*pt.pat,
other => other,
};
match pat {
syn::Pat::Ident(pi)
if pi.attrs.is_empty() && pi.by_ref.is_none() && pi.subpat.is_none() =>
{
Some(&pi.ident)
}
_ => None,
}
}

/// Walk down method-call receivers and require the innermost expression
/// to be a bare reference to `target` — i.e. the tail is shaped like
/// `target.foo(...).bar(...)`. Anything else (a fresh `Command::new`,
/// a different binding, a function call) is rejected.
fn tail_chains_back_to_ident(expr: &Expr, target: &syn::Ident) -> bool {
let mut cur = expr;
loop {
match cur {
Expr::MethodCall(mc) => cur = &mc.receiver,
Expr::Path(p)
if p.qself.is_none()
&& p.path.leading_colon.is_none()
&& p.path.segments.len() == 1
&& p.path.segments[0].ident == *target
&& matches!(p.path.segments[0].arguments, syn::PathArguments::None) =>
{
return true;
}
_ => return false,
}
}
}

struct UuAppExprValidator {
errors: Vec<String>,
}
Expand Down Expand Up @@ -1203,4 +1295,116 @@ pub fn uu_app() -> clap::Command {
let msg = format!("{err:#}");
assert!(msg.contains("Command::new"), "got: {msg}");
}

#[test]
fn accepts_let_bound_command_with_tail_chain() {
// Upstream uutils truncate now splits uu_app() into
// `let cmd = Command::new(...)...;
// configure_localized_command(cmd).arg(...)...`.
// The Rewriter folds `configure_localized_command(cmd)` to `cmd`,
// leaving exactly the `let`-then-tail shape the validator must accept.
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat")
.version(uucore::crate_version!())
.infer_long_args(true);
uucore::clap_localization::configure_localized_command(cmd)
.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let body = run(&uutils, "cat", "poc").unwrap();
assert!(body.contains("pub fn cat_command() -> clap::Command"));
assert!(body.contains("Command::new(\"cat\")"));
// The configure_localized_command(cmd) wrapper is folded away;
// only `cmd.arg(...)` remains.
assert!(
!body.contains("configure_localized_command"),
"wrapper should be stripped: {body}"
);
assert!(body.contains("cmd.arg("), "got: {body}");
}

#[test]
fn rejects_let_with_non_command_initializer() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = std::process::Command::new("sh").arg("-c").arg("nope").status().unwrap();
Command::new("cat").arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("unsafe uu_app"), "got: {msg}");
}

#[test]
fn rejects_tail_not_chained_off_let_binding() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat");
Command::new("cat").arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("chain on the let binding"), "got: {msg}");
}

#[test]
fn rejects_three_statement_body() {
let (_tmp, uutils) = fixture(&[
(
"src/uu/cat/src/cat.rs",
r#"
mod options {
pub static FILE: &str = "file";
}

pub fn uu_app() -> clap::Command {
let cmd = Command::new("cat");
std::process::abort();
cmd.arg(Arg::new(options::FILE))
}
"#,
),
("src/uu/cat/locales/en-US.ftl", ""),
]);

let err = run(&uutils, "cat", "poc").unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("found 3 statements"), "got: {msg}");
}
}
16 changes: 11 additions & 5 deletions specs/coreutils-args-port.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,17 @@ codegen**, not by depending on `uu_*` crates at runtime.
through the shim — if it does, every uutils env-default
auto-lights as that option's bashkit support lands.
5. Validates the rewritten `uu_app()` before emission: args mode accepts
only a single tail clap `Command` builder expression. Prefix
statements, block expressions, loops, matches, async blocks, and
unsafe blocks are rejected before any generated Rust is written.
This keeps third-party uutils source from smuggling arbitrary
executable statements into `<util>_command()` (TM-INF-025).
either a single tail clap `Command` builder expression, or the
two-statement shape `let <ident> = Command::new(...)<chain>;
<ident>.<method>(...)<chain>` where the `let` initializer is itself
a Command::new chain and the tail's innermost receiver is the
let-bound identifier (the same shape that emerges after the Rewriter
folds `uucore::clap_localization::configure_localized_command(cmd)`
to `cmd`). Anything else — additional prefix statements, block
expressions, loops, matches, async blocks, unsafe blocks, or a
destructuring/let-else binding — is rejected before any generated
Rust is written. This keeps third-party uutils source from smuggling
arbitrary executable statements into `<util>_command()` (TM-INF-025).
6. Emits a generated file under
`crates/bashkit/src/builtins/generated/<util>_args.rs` with a clean
`pub fn <util>_command() -> clap::Command`.
Expand Down
2 changes: 1 addition & 1 deletion specs/threat-model.md
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ All execution stays within the virtual interpreter — no OS subprocess is spawn
| TM-INF-022 | Library Debug shapes leak via stderr | Builtins wrapping external libs (jaq, regex, serde_json, semver, chrono, …) format errors with `format!("{:?}", e)` and dump internal struct shapes into stderr — e.g. jq printed `(File { code: "<800 chars of prepended compat-defs source>", path: () }, [("@tsv", Filter(0))])` to LLM agents | Three-layer enforcement, all run by `cargo test`: (1) static — `builtins::tests::no_debug_fmt_in_builtin_source` walks every `crates/bashkit/src/builtins/*.rs` and forbids `{:?}` / `{:#?}` / `{name:?}` (per-line opt-out: `// debug-ok: <reason>`); (2) dynamic per-tool — each tool's `mod tests` calls `bashkit::testing::assert_no_leak` with malformed inputs; (3) fuzz — every cargo-fuzz target plus 5 proptest cases call `bashkit::testing::assert_fuzz_invariants` against random input and check the same invariants on stderr/stdout | **FIXED** |
| TM-INF-023 | jq `halt` / `halt_error` terminate the host process | jaq-std 3.0's `halt(N)` native calls `std::process::exit(exit_code)`. `defs.jq` wraps it with `def halt: halt(0);` and `def halt_error(...): ..., halt(...);`. Untrusted jq filters can invoke `halt` / `halt_error` to exit the entire embedding process, escaping the `ExecResult`-based sandbox boundary and causing process-wide DoS for any caller hosting bashkit. | Strip the upstream `halt` native from `jaq_std::funs::<D>()` and chain in a safe replacement that pops the variable argument and returns `Error::str("halt is disabled in the bashkit sandbox")`. The wrapper defs (`halt`, `halt_error`) still resolve, so callers see a normal jq runtime error (exit 5) instead of the host being killed. Regression tests in `builtins::jq::tests`: `halt_does_not_terminate_host_process`, `halt_with_arg_does_not_terminate_host_process`, `halt_error_does_not_terminate_host_process`. | **FIXED** |
| TM-INF-024 | Host env side-channel via clap `Arg::env(...)` | Clap-based builtins ported from uutils (currently `ls`, with `.env("TABSIZE")` and `.env("TIME_STYLE")` on `uu_app()`) resolve those defaults from `std::env`, not bashkit's `ctx.env`. This breaks the sandbox boundary two ways: (1) presence-probe — a script can tell whether the host process has `TIME_STYLE`/`TABSIZE` set by observing `ls`'s behaviour; (2) availability — a host- or container-set `TIME_STYLE` materialises as a clap value source for an option bashkit hasn't implemented yet, so the unsupported-option gate fires on every plain `ls` and breaks the builtin for unrelated tenants | Four-layer fix: (1) **codegen strips runtime `.env(...)` and harvests it** — `bashkit-coreutils-port` rewrites `.env("FOO")` chained method calls out of every Arg builder it ports AND emits a sidecar `<UTIL>_ENV_DEFAULTS: &[EnvDefault]` table recording each stripped annotation; (2) **virtual-env shim** — `crate::builtins::clap_env::apply_env_defaults` rewrites argv from `ctx.env` (never `std::env`) before `try_get_matches_from`, emulating clap's "argv > env > default" precedence so uutils' env-default UX is preserved without re-opening the host channel; (3) **static guards** — `no_clap_env_in_generated_parsers` forbids runtime `.env(` calls in `generated/*.rs`, and `every_generated_parser_emits_env_defaults_table` enforces every util emits the sidecar (uniform surface, no per-util conditionals); (4) **defence-in-depth** — workspace `clap` dep drops the `env` cargo feature, so a re-introduced `.env(...)` fails to compile rather than silently re-opening the channel. Coverage: ls runtime tests pin both directions — `ls_ignores_host_time_style_and_tabsize` (host `std::env` ignored) and `ls_honors_virtual_env_time_style` (virtual `ctx.env` honoured) | **FIXED** |
| TM-INF-025 | Untrusted generated Rust runs in drift CI with repository write token | The coreutils argument-drift workflow consumes third-party `uutils/coreutils` source, regenerates Rust from `uu_app()`, then builds/tests bashkit. If the generator preserved arbitrary upstream statements and the same job held `contents: write`/`pull-requests: write`, malicious upstream code could execute during parser construction with repository write impact. | Two-layer fix: (1) `bashkit-coreutils-port` validates args-mode `uu_app()` before emission and rejects anything other than a single tail clap `Command` builder expression; regression test `rejects_executable_statements_in_uu_app` proves write+abort payloads fail codegen. (2) `.github/workflows/coreutils-args-drift.yml` separates privilege: regeneration/build/test has only `contents: read` and `persist-credentials: false` on both checkouts; the later `open-pr` job gets write permission but does not build or execute generated Rust, only commits the tested artifact. | **FIXED** |
| TM-INF-025 | Untrusted generated Rust runs in drift CI with repository write token | The coreutils argument-drift workflow consumes third-party `uutils/coreutils` source, regenerates Rust from `uu_app()`, then builds/tests bashkit. If the generator preserved arbitrary upstream statements and the same job held `contents: write`/`pull-requests: write`, malicious upstream code could execute during parser construction with repository write impact. | Two-layer fix: (1) `bashkit-coreutils-port` validates args-mode `uu_app()` before emission and accepts only (a) a single tail clap `Command` builder expression or (b) `let <ident> = Command::new(...)<chain>; <ident>.<method>(...)<chain>` where the initializer is itself a Command::new chain and the tail's innermost receiver is the let-bound ident; both shapes run through the same disallowed-method/closure/macro/block visitor. Regression tests `rejects_executable_statements_in_uu_app`, `rejects_let_with_non_command_initializer`, `rejects_tail_not_chained_off_let_binding`, and `rejects_three_statement_body` prove write+abort, hijacked initializers, mismatched tails, and inserted statements all fail codegen. (2) `.github/workflows/coreutils-args-drift.yml` separates privilege: regeneration/build/test has only `contents: read` and `persist-credentials: false` on both checkouts; the later `open-pr` job gets write permission but does not build or execute generated Rust, only commits the tested artifact. | **FIXED** |

**TM-INF-022**: Generalizes TM-INF-016 to cover the whole builtin surface.
The originating bug was `builtins/jq/` formatting jaq's compile/parse
Expand Down
Loading