everruns · chaliy · May 11, 2026
diff --git a/crates/bashkit-coreutils-port/src/args.rs b/crates/bashkit-coreutils-port/src/args.rs
@@ -643,30 +643,122 @@ fn collect_option_constants(file: &syn::File) -> Vec<Item> {
 }
 
 // THREAT[TM-INF-025]: args-mode consumes third-party Rust from uutils.
-// Only a single clap builder expression may become executable bashkit
-// code; prefix statements would run during CI/parser construction.
+// Only clap builder expressions may become executable bashkit code;
+// any other statement shape would run during CI/parser construction.
+//
+// Two accepted body shapes:
+//   1. `<Command::new(...) chain>`  (single tail expression)
+//   2. `let <ident> = <Command::new(...) chain>;
+//       <ident>.<builder method>(...)...`
+//      — equivalent to a single chain split across a binding. The
+//      tail's innermost receiver must be the let-bound identifier;
+//      both halves run through the same chain validator.
 fn validate_uu_app_body(uu_app: &ItemFn) -> Result<()> {
-    let command_expr = match uu_app.block.stmts.as_slice() {
-        [Stmt::Expr(expr, None)] => expr,
+    match uu_app.block.stmts.as_slice() {
+        [Stmt::Expr(expr, None)] => validate_command_chain_expr(expr),
+        [Stmt::Local(local), Stmt::Expr(tail_expr, None)] => {
+            validate_let_bound_command_body(local, tail_expr)
+        }
         stmts => bail!(
-            "unsafe uu_app body: expected a single clap Command builder expression, found {} statements",
+            "unsafe uu_app body: expected a single clap Command builder expression \
+             or `let <ident> = Command::new(...)...; <ident>.method(...)...`, \
+             found {} statements",
             stmts.len()
         ),
-    };
+    }
+}
 
+fn validate_command_chain_expr(expr: &Expr) -> Result<()> {
     let mut validator = UuAppExprValidator { errors: vec![] };
-    syn::visit::Visit::visit_expr(&mut validator, command_expr);
+    syn::visit::Visit::visit_expr(&mut validator, expr);
     if !validator.errors.is_empty() {
         bail!("unsafe uu_app body: {}", validator.errors.join("; "));
     }
-    if !chain_roots_at_command_new(command_expr) {
+    if !chain_roots_at_command_new(expr) {
         bail!(
             "unsafe uu_app body: tail expression must be a clap::Command::new(...) builder chain"
         );
     }
     Ok(())
 }
 
+fn validate_let_bound_command_body(local: &syn::Local, tail_expr: &Expr) -> Result<()> {
+    let binding_ident = simple_let_ident(local).ok_or_else(|| {
+        anyhow!(
+            "unsafe uu_app body: let binding must be `let <ident> = ...` \
+             (no destructuring, no let-else)"
+        )
+    })?;
+    let init = local
+        .init
+        .as_ref()
+        .ok_or_else(|| anyhow!("unsafe uu_app body: let binding must have an initializer"))?;
+    if init.diverge.is_some() {
+        bail!("unsafe uu_app body: let-else is not allowed");
+    }
+    // The bound expression must itself be a Command::new(...) builder
+    // chain so the only thing the binding can hold is a clap Command.
+    validate_command_chain_expr(&init.expr)?;
+
+    // The tail must be a method chain whose innermost receiver is the
+    // let-bound identifier — i.e. structurally a continuation of the
+    // builder chain.
+    let mut validator = UuAppExprValidator { errors: vec![] };
+    syn::visit::Visit::visit_expr(&mut validator, tail_expr);
+    if !validator.errors.is_empty() {
+        bail!("unsafe uu_app body: {}", validator.errors.join("; "));
+    }
+    if !tail_chains_back_to_ident(tail_expr, binding_ident) {
+        bail!(
+            "unsafe uu_app body: tail expression must be a `{}.method(...)...` \
+             chain on the let binding",
+            binding_ident
+        );
+    }
+    Ok(())
+}
+
+/// Returns the bound identifier of a plain `let <ident> [: T] = ...;`.
+/// Rejects destructuring, `ref`, subpatterns, or attributes — anything
+/// that could hide an active expression inside the binding pattern.
+fn simple_let_ident(local: &syn::Local) -> Option<&syn::Ident> {
+    let pat = match &local.pat {
+        syn::Pat::Type(pt) => &*pt.pat,
+        other => other,
+    };
+    match pat {
+        syn::Pat::Ident(pi)
+            if pi.attrs.is_empty() && pi.by_ref.is_none() && pi.subpat.is_none() =>
+        {
+            Some(&pi.ident)
+        }
+        _ => None,
+    }
+}
+
+/// Walk down method-call receivers and require the innermost expression
+/// to be a bare reference to `target` — i.e. the tail is shaped like
+/// `target.foo(...).bar(...)`. Anything else (a fresh `Command::new`,
+/// a different binding, a function call) is rejected.
+fn tail_chains_back_to_ident(expr: &Expr, target: &syn::Ident) -> bool {
+    let mut cur = expr;
+    loop {
+        match cur {
+            Expr::MethodCall(mc) => cur = &mc.receiver,
+            Expr::Path(p)
+                if p.qself.is_none()
+                    && p.path.leading_colon.is_none()
+                    && p.path.segments.len() == 1
+                    && p.path.segments[0].ident == *target
+                    && matches!(p.path.segments[0].arguments, syn::PathArguments::None) =>
+            {
+                return true;
+            }
+            _ => return false,
+        }
+    }
+}
+
 struct UuAppExprValidator {
     errors: Vec<String>,
 }
@@ -1203,4 +1295,116 @@ pub fn uu_app() -> clap::Command {
         let msg = format!("{err:#}");
         assert!(msg.contains("Command::new"), "got: {msg}");
     }
+
+    #[test]
+    fn accepts_let_bound_command_with_tail_chain() {
+        // Upstream uutils truncate now splits uu_app() into
+        // `let cmd = Command::new(...)...;
+        //  configure_localized_command(cmd).arg(...)...`.
+        // The Rewriter folds `configure_localized_command(cmd)` to `cmd`,
+        // leaving exactly the `let`-then-tail shape the validator must accept.
+        let (_tmp, uutils) = fixture(&[
+            (
+                "src/uu/cat/src/cat.rs",
+                r#"
+mod options {
+    pub static FILE: &str = "file";
+}
+
+pub fn uu_app() -> clap::Command {
+    let cmd = Command::new("cat")
+        .version(uucore::crate_version!())
+        .infer_long_args(true);
+    uucore::clap_localization::configure_localized_command(cmd)
+        .arg(Arg::new(options::FILE))
+}
+"#,
+            ),
+            ("src/uu/cat/locales/en-US.ftl", ""),
+        ]);
+
+        let body = run(&uutils, "cat", "poc").unwrap();
+        assert!(body.contains("pub fn cat_command() -> clap::Command"));
+        assert!(body.contains("Command::new(\"cat\")"));
+        // The configure_localized_command(cmd) wrapper is folded away;
+        // only `cmd.arg(...)` remains.
+        assert!(
+            !body.contains("configure_localized_command"),
+            "wrapper should be stripped: {body}"
+        );
+        assert!(body.contains("cmd.arg("), "got: {body}");
+    }
+
+    #[test]
+    fn rejects_let_with_non_command_initializer() {
+        let (_tmp, uutils) = fixture(&[
+            (
+                "src/uu/cat/src/cat.rs",
+                r#"
+mod options {
+    pub static FILE: &str = "file";
+}
+
+pub fn uu_app() -> clap::Command {
+    let cmd = std::process::Command::new("sh").arg("-c").arg("nope").status().unwrap();
+    Command::new("cat").arg(Arg::new(options::FILE))
+}
+"#,
+            ),
+            ("src/uu/cat/locales/en-US.ftl", ""),
+        ]);
+
+        let err = run(&uutils, "cat", "poc").unwrap_err();
+        let msg = format!("{err:#}");
+        assert!(msg.contains("unsafe uu_app"), "got: {msg}");
+    }
+
+    #[test]
+    fn rejects_tail_not_chained_off_let_binding() {
+        let (_tmp, uutils) = fixture(&[
+            (
+                "src/uu/cat/src/cat.rs",
+                r#"
+mod options {
+    pub static FILE: &str = "file";
+}
+
+pub fn uu_app() -> clap::Command {
+    let cmd = Command::new("cat");
+    Command::new("cat").arg(Arg::new(options::FILE))
+}
+"#,
+            ),
+            ("src/uu/cat/locales/en-US.ftl", ""),
+        ]);
+
+        let err = run(&uutils, "cat", "poc").unwrap_err();
+        let msg = format!("{err:#}");
+        assert!(msg.contains("chain on the let binding"), "got: {msg}");
+    }
+
+    #[test]
+    fn rejects_three_statement_body() {
+        let (_tmp, uutils) = fixture(&[
+            (
+                "src/uu/cat/src/cat.rs",
+                r#"
+mod options {
+    pub static FILE: &str = "file";
+}
+
+pub fn uu_app() -> clap::Command {
+    let cmd = Command::new("cat");
+    std::process::abort();
+    cmd.arg(Arg::new(options::FILE))
+}
+"#,
+            ),
+            ("src/uu/cat/locales/en-US.ftl", ""),
+        ]);
+
+        let err = run(&uutils, "cat", "poc").unwrap_err();
+        let msg = format!("{err:#}");
+        assert!(msg.contains("found 3 statements"), "got: {msg}");
+    }
 }
diff --git a/specs/coreutils-args-port.md b/specs/coreutils-args-port.md
@@ -45,11 +45,17 @@ codegen**, not by depending on `uu_*` crates at runtime.
      through the shim — if it does, every uutils env-default
      auto-lights as that option's bashkit support lands.
 5. Validates the rewritten `uu_app()` before emission: args mode accepts
-   only a single tail clap `Command` builder expression. Prefix
-   statements, block expressions, loops, matches, async blocks, and
-   unsafe blocks are rejected before any generated Rust is written.
-   This keeps third-party uutils source from smuggling arbitrary
-   executable statements into `<util>_command()` (TM-INF-025).
+   either a single tail clap `Command` builder expression, or the
+   two-statement shape `let <ident> = Command::new(...)<chain>;
+   <ident>.<method>(...)<chain>` where the `let` initializer is itself
+   a Command::new chain and the tail's innermost receiver is the
+   let-bound identifier (the same shape that emerges after the Rewriter
+   folds `uucore::clap_localization::configure_localized_command(cmd)`
+   to `cmd`). Anything else — additional prefix statements, block
+   expressions, loops, matches, async blocks, unsafe blocks, or a
+   destructuring/let-else binding — is rejected before any generated
+   Rust is written. This keeps third-party uutils source from smuggling
+   arbitrary executable statements into `<util>_command()` (TM-INF-025).
 6. Emits a generated file under
    `crates/bashkit/src/builtins/generated/<util>_args.rs` with a clean
    `pub fn <util>_command() -> clap::Command`.

diff --git a/specs/threat-model.md b/specs/threat-model.md
@@ -440,7 +440,7 @@ All execution stays within the virtual interpreter — no OS subprocess is spawn
 | TM-INF-022 | Library Debug shapes leak via stderr | Builtins wrapping external libs (jaq, regex, serde_json, semver, chrono, …) format errors with `format!("{:?}", e)` and dump internal struct shapes into stderr — e.g. jq printed `(File { code: "<800 chars of prepended compat-defs source>", path: () }, [("@tsv", Filter(0))])` to LLM agents | Three-layer enforcement, all run by `cargo test`: (1) static — `builtins::tests::no_debug_fmt_in_builtin_source` walks every `crates/bashkit/src/builtins/*.rs` and forbids `{:?}` / `{:#?}` / `{name:?}` (per-line opt-out: `// debug-ok: <reason>`); (2) dynamic per-tool — each tool's `mod tests` calls `bashkit::testing::assert_no_leak` with malformed inputs; (3) fuzz — every cargo-fuzz target plus 5 proptest cases call `bashkit::testing::assert_fuzz_invariants` against random input and check the same invariants on stderr/stdout | **FIXED** |
 | TM-INF-023 | jq `halt` / `halt_error` terminate the host process | jaq-std 3.0's `halt(N)` native calls `std::process::exit(exit_code)`. `defs.jq` wraps it with `def halt: halt(0);` and `def halt_error(...): ..., halt(...);`. Untrusted jq filters can invoke `halt` / `halt_error` to exit the entire embedding process, escaping the `ExecResult`-based sandbox boundary and causing process-wide DoS for any caller hosting bashkit. | Strip the upstream `halt` native from `jaq_std::funs::<D>()` and chain in a safe replacement that pops the variable argument and returns `Error::str("halt is disabled in the bashkit sandbox")`. The wrapper defs (`halt`, `halt_error`) still resolve, so callers see a normal jq runtime error (exit 5) instead of the host being killed. Regression tests in `builtins::jq::tests`: `halt_does_not_terminate_host_process`, `halt_with_arg_does_not_terminate_host_process`, `halt_error_does_not_terminate_host_process`. | **FIXED** |
 | TM-INF-024 | Host env side-channel via clap `Arg::env(...)` | Clap-based builtins ported from uutils (currently `ls`, with `.env("TABSIZE")` and `.env("TIME_STYLE")` on `uu_app()`) resolve those defaults from `std::env`, not bashkit's `ctx.env`. This breaks the sandbox boundary two ways: (1) presence-probe — a script can tell whether the host process has `TIME_STYLE`/`TABSIZE` set by observing `ls`'s behaviour; (2) availability — a host- or container-set `TIME_STYLE` materialises as a clap value source for an option bashkit hasn't implemented yet, so the unsupported-option gate fires on every plain `ls` and breaks the builtin for unrelated tenants | Four-layer fix: (1) **codegen strips runtime `.env(...)` and harvests it** — `bashkit-coreutils-port` rewrites `.env("FOO")` chained method calls out of every Arg builder it ports AND emits a sidecar `<UTIL>_ENV_DEFAULTS: &[EnvDefault]` table recording each stripped annotation; (2) **virtual-env shim** — `crate::builtins::clap_env::apply_env_defaults` rewrites argv from `ctx.env` (never `std::env`) before `try_get_matches_from`, emulating clap's "argv > env > default" precedence so uutils' env-default UX is preserved without re-opening the host channel; (3) **static guards** — `no_clap_env_in_generated_parsers` forbids runtime `.env(` calls in `generated/*.rs`, and `every_generated_parser_emits_env_defaults_table` enforces every util emits the sidecar (uniform surface, no per-util conditionals); (4) **defence-in-depth** — workspace `clap` dep drops the `env` cargo feature, so a re-introduced `.env(...)` fails to compile rather than silently re-opening the channel. Coverage: ls runtime tests pin both directions — `ls_ignores_host_time_style_and_tabsize` (host `std::env` ignored) and `ls_honors_virtual_env_time_style` (virtual `ctx.env` honoured) | **FIXED** |
-| TM-INF-025 | Untrusted generated Rust runs in drift CI with repository write token | The coreutils argument-drift workflow consumes third-party `uutils/coreutils` source, regenerates Rust from `uu_app()`, then builds/tests bashkit. If the generator preserved arbitrary upstream statements and the same job held `contents: write`/`pull-requests: write`, malicious upstream code could execute during parser construction with repository write impact. | Two-layer fix: (1) `bashkit-coreutils-port` validates args-mode `uu_app()` before emission and rejects anything other than a single tail clap `Command` builder expression; regression test `rejects_executable_statements_in_uu_app` proves write+abort payloads fail codegen. (2) `.github/workflows/coreutils-args-drift.yml` separates privilege: regeneration/build/test has only `contents: read` and `persist-credentials: false` on both checkouts; the later `open-pr` job gets write permission but does not build or execute generated Rust, only commits the tested artifact. | **FIXED** |
+| TM-INF-025 | Untrusted generated Rust runs in drift CI with repository write token | The coreutils argument-drift workflow consumes third-party `uutils/coreutils` source, regenerates Rust from `uu_app()`, then builds/tests bashkit. If the generator preserved arbitrary upstream statements and the same job held `contents: write`/`pull-requests: write`, malicious upstream code could execute during parser construction with repository write impact. | Two-layer fix: (1) `bashkit-coreutils-port` validates args-mode `uu_app()` before emission and accepts only (a) a single tail clap `Command` builder expression or (b) `let <ident> = Command::new(...)<chain>; <ident>.<method>(...)<chain>` where the initializer is itself a Command::new chain and the tail's innermost receiver is the let-bound ident; both shapes run through the same disallowed-method/closure/macro/block visitor. Regression tests `rejects_executable_statements_in_uu_app`, `rejects_let_with_non_command_initializer`, `rejects_tail_not_chained_off_let_binding`, and `rejects_three_statement_body` prove write+abort, hijacked initializers, mismatched tails, and inserted statements all fail codegen. (2) `.github/workflows/coreutils-args-drift.yml` separates privilege: regeneration/build/test has only `contents: read` and `persist-credentials: false` on both checkouts; the later `open-pr` job gets write permission but does not build or execute generated Rust, only commits the tested artifact. | **FIXED** |
 
 **TM-INF-022**: Generalizes TM-INF-016 to cover the whole builtin surface.
 The originating bug was `builtins/jq/` formatting jaq's compile/parse