diff --git a/crates/bashkit/src/builtins/jq/args.rs b/crates/bashkit/src/builtins/jq/args.rs index eae33919..dfb5d34f 100644 --- a/crates/bashkit/src/builtins/jq/args.rs +++ b/crates/bashkit/src/builtins/jq/args.rs @@ -19,6 +19,18 @@ use super::format::Indent; /// jq has no documented cap; we apply one defensively to keep memory bounded. pub(super) const MAX_ARGS_POSITIONAL: usize = 4096; +/// Maximum number of `--rawfile` / `--slurpfile` bindings per call. +/// File bindings retain full file contents in jq globals and `$ARGS.named`, +/// so this must stay much lower than the positional argument cap. +// THREAT[TM-DOS-062]: Count and byte caps stop jq file bindings from +// multiplying one bounded VFS file into unbounded in-process global state. +pub(super) const MAX_FILE_VAR_REQUESTS: usize = 128; + +/// Maximum cumulative bytes read through `--rawfile` / `--slurpfile`. +/// This is counted per binding, so repeated references to the same VFS file +/// cannot multiply one file into unbounded jq global state. +pub(super) const MAX_FILE_VAR_BYTES: usize = 16 * 1024 * 1024; + /// Parsed jq invocation. Fields mirror the documented jq options modulo /// the few we explicitly do not implement (`--seq`, `--stream`, color flags). pub(super) struct JqArgs<'a> { @@ -258,6 +270,11 @@ pub(super) fn parse<'a>(args: &'a [String]) -> ParseOutcome<'a> { }, "--slurpfile" | "--rawfile" => match (args.get(i + 1), args.get(i + 2)) { (Some(name), Some(path)) => { + if out.file_var_requests.len() >= MAX_FILE_VAR_REQUESTS { + return ParseOutcome::Done(usage_error(format!( + "jq: too many file bindings (max {MAX_FILE_VAR_REQUESTS})" + ))); + } let kind = if arg == "--slurpfile" { FileVarKind::Slurp } else { @@ -533,6 +550,27 @@ mod tests { } } + #[test] + fn file_bindings_count_is_capped() { + let mut args = Vec::new(); + for i in 0..=MAX_FILE_VAR_REQUESTS { + args.push("--rawfile".to_string()); + args.push(format!("x{i}")); + args.push("/x.txt".to_string()); + } + args.push("-n".to_string()); + args.push(".".to_string()); + let leaked: &'static [String] = Box::leak(args.into_boxed_slice()); + + match parse(leaked) { + ParseOutcome::Done(r) => { + assert_eq!(r.exit_code, 2); + assert!(r.stderr.contains("too many file bindings")); + } + _ => panic!("expected Done"), + } + } + #[test] fn args_strings_become_positional() { match parse_strs(&["-n", ".", "--args", "a", "b", "c"]) { diff --git a/crates/bashkit/src/builtins/jq/mod.rs b/crates/bashkit/src/builtins/jq/mod.rs index 70bedf79..92da84ee 100644 --- a/crates/bashkit/src/builtins/jq/mod.rs +++ b/crates/bashkit/src/builtins/jq/mod.rs @@ -38,7 +38,7 @@ mod regex_compat; #[cfg(test)] mod tests; -use args::{FileVarKind, JqArgs, ParseOutcome}; +use args::{FileVarKind, JqArgs, MAX_FILE_VAR_BYTES, ParseOutcome}; use compat::{ ARGS_VAR_NAME, ENV_VAR_NAME, FILENAME_VAR_NAME, LINENO_VAR_NAME, PUBLIC_ENV_VAR_NAME, build_compat_prefix, @@ -102,12 +102,28 @@ async fn run_jq(ctx: Context<'_>, parsed: JqArgs<'_>) -> Result { // so we can fail fast on missing files. let mut all_var_bindings = parsed.var_bindings.clone(); let mut all_named_args = parsed.named_args.clone(); + let mut file_binding_bytes = 0usize; for req in &parsed.file_var_requests { let path = resolve_path(ctx.cwd, req.path); + if let Ok(meta) = ctx.fs.stat(&path).await + && meta.file_type.is_file() + && file_binding_exceeds_limit(file_binding_bytes, meta.size) + { + return Ok(file_binding_limit_error(file_binding_bytes, meta.size)); + } let text = match read_text_file(&*ctx.fs, &path, "jq").await { Ok(t) => t, Err(e) => return Ok(e), }; + match file_binding_bytes.checked_add(text.len()) { + Some(total) if total <= MAX_FILE_VAR_BYTES => file_binding_bytes = total, + _ => { + return Ok(file_binding_limit_error( + file_binding_bytes, + text.len() as u64, + )); + } + } let value = match req.kind { FileVarKind::Raw => serde_json::Value::String(text), FileVarKind::Slurp => match parse_json_stream(&text) { @@ -413,6 +429,25 @@ async fn run_jq(ctx: Context<'_>, parsed: JqArgs<'_>) -> Result { Ok(ExecResult::ok(output)) } +fn file_binding_exceeds_limit(used: usize, next: u64) -> bool { + match usize::try_from(next) { + Ok(next) => used + .checked_add(next) + .is_none_or(|total| total > MAX_FILE_VAR_BYTES), + Err(_) => true, + } +} + +fn file_binding_limit_error(used: usize, next: u64) -> ExecResult { + ExecResult::err( + format!( + "jq: file bindings exceed {} bytes (used {used}, next {next})\n", + MAX_FILE_VAR_BYTES + ), + 2, + ) +} + /// `--rawfile`/`--slurpfile`/$ARGS plumbing helper. The serialized object is /// `{"positional": [...], "named": {...}}`. fn build_args_obj( diff --git a/crates/bashkit/src/builtins/jq/tests.rs b/crates/bashkit/src/builtins/jq/tests.rs index d2f7bf91..f126099d 100644 --- a/crates/bashkit/src/builtins/jq/tests.rs +++ b/crates/bashkit/src/builtins/jq/tests.rs @@ -375,6 +375,27 @@ async fn rawfile_with_raw_output_emits_unquoted() { assert_eq!(result.stdout.trim(), "hello"); } +#[tokio::test] +async fn repeated_rawfile_bindings_are_byte_capped() { + let payload = "x".repeat(1024 * 1024); + let mut args = Vec::new(); + for i in 0..32 { + args.push("--rawfile".to_string()); + args.push(format!("x{i}")); + args.push("/big.txt".to_string()); + } + args.push("-n".to_string()); + args.push(".".to_string()); + let refs: Vec<&str> = args.iter().map(String::as_str).collect(); + + let result = run_jq_with_files(&refs, &[("/big.txt", &payload)]) + .await + .unwrap(); + + assert_eq!(result.exit_code, 2); + assert!(result.stderr.contains("file bindings exceed")); +} + // ========================================================================= // Tier 2: --args / --jsonargs / $ARGS // ========================================================================= diff --git a/specs/threat-model.md b/specs/threat-model.md index 406018d4..be5657b6 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -285,6 +285,7 @@ max_ast_depth: 100, // Parser recursion (TM-DOS-022) | TM-DOS-059 | Parameter expansion replacement bomb | `${x//a/$(printf 'b%.0s' {1..1000})}` on large `x` amplifies output multiplicatively (10K × 1K = 10MB) | `max_total_variable_bytes` + `max_stdout_bytes` | **MITIGATED** | | TM-DOS-060 | Sparse array huge-index allocation | `arr[999999999]=x` could allocate ~1B empty slots if arrays are Vec-backed; negative indices could cause OOB | HashMap-based arrays; `max_array_entries` caps total entries | **MITIGATED** | | TM-DOS-061 | Snapshot function restore bypasses parser/function limits | Crafted snapshot restores prebuilt or deeply nested functions that exceed the current tenant's parser depth or function memory budget | Re-parse restored function source with current `ExecutionLimits`; re-apply function memory budget before insertion | **MITIGATED** | +| TM-DOS-062 | jq file binding amplification | Repeated `--rawfile` / `--slurpfile` bindings to one max-sized VFS file multiply retained jq globals and `$ARGS.named` values without consuming more VFS quota | `MAX_FILE_VAR_REQUESTS` caps binding count; `MAX_FILE_VAR_BYTES` counts cumulative file bytes per binding before retaining globals | **MITIGATED** | **TM-DOS-051** (mitigated): `builtins/yaml.rs` — `parse_yaml_block`, `parse_yaml_map`, `parse_yaml_list` carry a `depth: usize` parameter. When `depth > MAX_YAML_DEPTH = 100`, @@ -1446,6 +1447,7 @@ This section maps former vulnerability IDs to the new threat ID scheme and track | Dotenv internal variable guard | TM-INJ-018 | `is_internal_variable()` check in `Dotenv::execute` (`builtins/dotenv.rs:138`) | **MITIGATED** | | Session-level cumulative counters | TM-ISO-005 | `SessionLimits` caps cumulative commands and `exec()` calls across the lifetime of a `Bash` instance | **MITIGATED** | | Per-instance memory budget | TM-ISO-006 | `MemoryLimits` capping variable count, total bytes, array entries, function count, function body bytes | **MITIGATED** | +| jq file binding amplification | TM-DOS-062 | `MAX_FILE_VAR_REQUESTS` and `MAX_FILE_VAR_BYTES` bound `--rawfile` / `--slurpfile` globals | **MITIGATED** | --- @@ -1468,6 +1470,7 @@ ExecutionLimits::new() // Note: MAX_ARITHMETIC_DEPTH (50) is a compile-time constant in interpreter (TM-DOS-026) // Note: MAX_AWK_PARSER_DEPTH (100) is a compile-time constant in builtins/awk.rs (TM-DOS-027) // Note: MAX_JQ_JSON_DEPTH (100) is a compile-time constant in builtins/jq/ (TM-DOS-027) +// Note: MAX_FILE_VAR_REQUESTS (128) and MAX_FILE_VAR_BYTES (16MiB) cap jq file bindings (TM-DOS-062) // Path validation limits (applied via FsLimits): FsLimits::new()