diff --git a/containers/agentcap-goose/agentcap-init.sh b/containers/agentcap-goose/agentcap-init.sh index d288a39..570d967 100644 --- a/containers/agentcap-goose/agentcap-init.sh +++ b/containers/agentcap-goose/agentcap-init.sh @@ -33,6 +33,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills" fi +# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task +# work can call it. The dir is bind-mounted (read-only) at its host path. +if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then + export PATH="$AGENTCAP_TOOL_BIN:$PATH" +fi + # Record this shell's PID so the sandbox can target the about-to-be # exec'd agent precisely on timeout. ``exec`` keeps $$. echo $$ > /tmp/agentcap-current.pid diff --git a/containers/agentcap-hermes/agentcap-init.sh b/containers/agentcap-hermes/agentcap-init.sh index f4a043c..dc404bb 100644 --- a/containers/agentcap-hermes/agentcap-init.sh +++ b/containers/agentcap-hermes/agentcap-init.sh @@ -66,6 +66,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then ln -sf "$AGENTCAP_SKILLS_DIR/agents/AGENTS.md" "$PWD/AGENTS.md" fi +# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task +# work can call it. The dir is bind-mounted (read-only) at its host path. +if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then + export PATH="$AGENTCAP_TOOL_BIN:$PATH" +fi + # Record this shell's PID so the sandbox can target the about-to-be # exec'd agent precisely on timeout. ``exec`` keeps $$, so the value # stays valid after the replacement. diff --git a/containers/agentcap-opencode/agentcap-init.sh b/containers/agentcap-opencode/agentcap-init.sh index e6d1d0b..212da4c 100644 --- a/containers/agentcap-opencode/agentcap-init.sh +++ b/containers/agentcap-opencode/agentcap-init.sh @@ -38,6 +38,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills" fi +# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task +# work can call it. The dir is bind-mounted (read-only) at its host path. +if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then + export PATH="$AGENTCAP_TOOL_BIN:$PATH" +fi + # Record this shell's PID so the sandbox can target the about-to-be # exec'd agent precisely on timeout. ``exec`` keeps $$. echo $$ > /tmp/agentcap-current.pid diff --git a/containers/agentcap-pi/agentcap-init.sh b/containers/agentcap-pi/agentcap-init.sh index 3b46696..396fcfd 100644 --- a/containers/agentcap-pi/agentcap-init.sh +++ b/containers/agentcap-pi/agentcap-init.sh @@ -35,6 +35,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills" fi +# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task +# work can call it. The dir is bind-mounted (read-only) at its host path. +if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then + export PATH="$AGENTCAP_TOOL_BIN:$PATH" +fi + # Record this shell's PID so the sandbox can target the about-to-be # exec'd agent precisely on timeout. ``exec`` keeps $$. echo $$ > /tmp/agentcap-current.pid diff --git a/src/main.rs b/src/main.rs index 236700c..5d750b2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,10 @@ enum Cmd { /// Host dir with a huggingface/skills checkout (bind-mounted read-only). #[arg(long)] skills: Option, + /// Host dir of a self-contained toolchain (e.g. a relocatable venv); + /// bind-mounted read-only and its `bin/` prepended to the agent's PATH. + #[arg(long)] + tool_dir: Option, /// Plain-text file: one prompt per line (# comments + blanks ignored). #[arg(long)] tasks: String, @@ -87,12 +91,13 @@ fn main() -> Result<()> { api_key, sandbox, skills, + tool_dir, tasks, turns, followup, timeout, } => agentcap::run::run( - agent, model, upstream, api_key, sandbox, skills, tasks, turns, followup, timeout, + agent, model, upstream, api_key, sandbox, skills, tool_dir, tasks, turns, followup, timeout, ), Cmd::Ls { workspace, long } => agentcap::ls::run(workspace, long), Cmd::Export { diff --git a/src/run.rs b/src/run.rs index c4febfb..396f684 100644 --- a/src/run.rs +++ b/src/run.rs @@ -25,6 +25,7 @@ pub fn run( api_key: Option, sandbox_dir: Option, skills_dir: Option, + tool_dir: Option, tasks_file: String, turns: i64, followup: String, @@ -103,6 +104,12 @@ pub fn run( env.insert("AGENTCAP_SKILLS_DIR".into(), skills_abs.clone()); readonly.push(PathBuf::from(skills_abs)); } + // A self-contained toolchain dir, mounted read-only at its host path. + if let Some(t) = &tool_dir { + let (tool_bin, mount) = tool_dir_wiring(t); + env.insert("AGENTCAP_TOOL_BIN".into(), tool_bin); + readonly.push(mount); + } let writable: Vec = vec![ PathBuf::from(abs(&traces)), PathBuf::from(abs(&state)), @@ -166,6 +173,16 @@ fn abs(p: &Path) -> String { .into_owned() } +/// Sandbox wiring for `--tool-dir`: the `AGENTCAP_TOOL_BIN` value (the bundle's +/// `bin/`) and the read-only mount. The mount is the bundle *root*, not `bin/`, +/// so the interpreter and libs that `bin/` shebangs into come too; both are +/// absolute so the src==dst bind keeps those shebangs valid in-container. +fn tool_dir_wiring(tool_dir: &str) -> (String, PathBuf) { + let root = abs(Path::new(tool_dir)); + let bin = abs(&Path::new(&root).join("bin")); + (bin, PathBuf::from(root)) +} + fn is_hf_router(upstream: &str) -> bool { url::Url::parse(upstream) .ok() @@ -235,3 +252,25 @@ fn write_run_json( std::fs::write(workdir.join("run.json"), serde_json::to_string_pretty(&summary)?)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tool_dir_wiring_points_at_bin_and_mounts_root() { + let (bin, mount) = tool_dir_wiring("/opt/toolbox"); + // bin/ is on PATH; the whole bundle (interpreter + libs, not just bin/) is mounted. + assert_eq!(bin, "/opt/toolbox/bin"); + assert_eq!(mount, PathBuf::from("/opt/toolbox")); + } + + #[test] + fn tool_dir_wiring_absolutizes_relative_paths() { + // Relocatable src==dst mount needs absolute paths even for a relative arg. + let (bin, mount) = tool_dir_wiring("toolbox"); + assert!(mount.is_absolute(), "mount not absolute: {mount:?}"); + assert!(Path::new(&bin).is_absolute(), "bin not absolute: {bin}"); + assert!(bin.ends_with("toolbox/bin"), "bin not under toolbox/bin: {bin}"); + } +} diff --git a/tests/live.rs b/tests/live.rs index c965830..c163fe8 100644 --- a/tests/live.rs +++ b/tests/live.rs @@ -9,6 +9,7 @@ //! AGENTCAP_TEST_LLM_URL=http://127.0.0.1:8000 cargo test --test live -- --ignored //! Each test skips (passes) if no server is reachable. +use std::collections::BTreeMap; use std::process::Command; use std::time::Duration; @@ -40,6 +41,14 @@ fn upstream() -> Option { None } +/// Is `podman` on PATH? The tool-dir test needs only podman (no model server), +/// so it gates on this rather than [`upstream`]. +fn podman_available() -> bool { + std::env::var_os("PATH") + .map(|path| std::env::split_paths(&path).any(|d| d.join("podman").is_file())) + .unwrap_or(false) +} + /// Last `n` chars of `s`, for failure dumps. fn tail(s: &str, n: usize) -> String { let start = s.char_indices().rev().take(n).last().map(|(i, _)| i).unwrap_or(0); @@ -72,6 +81,10 @@ fn diagnostics(run_dir: &std::path::Path, summary: &Value, bin_stderr: &[u8]) -> /// `agentcap run --agent ` against the live server; assert the run dir, /// run.json shape, captures, and (for pi) the streamed JSONL trace. fn run_agent(agent: &str, expect_jsonl_traces: bool) { + if !podman_available() { + eprintln!("skip live[{agent}]: no podman on PATH"); + return; + } let Some(upstream) = upstream() else { eprintln!("skip live[{agent}]: no llama server (set AGENTCAP_TEST_LLM_URL or run one on :8000/:8080)"); return; @@ -168,6 +181,66 @@ fn live_goose() { run_agent("goose", false); } +/// `--tool-dir`: the bundle mounts read-only and its `bin/` lands on the agent's +/// PATH. Driven straight through the sandbox (no model) so the mount + init-script +/// wiring is asserted deterministically; `run()`'s derivation of this env from +/// `--tool-dir` is unit-tested in `run.rs`. Needs only podman (any agent image — +/// the tool-dir init block is identical across all four). +#[test] +#[ignore = "live: needs podman + a built per-agent image"] +fn live_tool_dir_mount() { + if !podman_available() { + eprintln!("skip live[tool-dir]: no podman on PATH"); + return; + } + + // A self-contained bundle: bin/greet prints a sentinel. + let tmp = tempfile::tempdir().unwrap(); + let bundle = tmp.path().join("toolbox"); + std::fs::create_dir_all(bundle.join("bin")).unwrap(); + let greet = bundle.join("bin/greet"); + std::fs::write(&greet, "#!/bin/sh\necho TOOLDIR_SENTINEL_OK\n").unwrap(); + { + use std::os::unix::fs::PermissionsExt; + std::fs::set_permissions(&greet, std::fs::Permissions::from_mode(0o755)).unwrap(); + } + + // The env run() derives for this bundle: bin/ on AGENTCAP_TOOL_BIN, the whole + // bundle mounted read-only at its host path. + let env: BTreeMap = BTreeMap::from([( + "AGENTCAP_TOOL_BIN".to_string(), + bundle.join("bin").to_string_lossy().into_owned(), + )]); + let sandbox = agentcap::sandbox::require_sandbox("pi", env.clone(), vec![bundle.clone()], vec![], &|m| { + eprintln!(" [sandbox] {m}") + }) + .expect("provision pi sandbox"); + + let probe = ["sh", "-c", "command -v greet; greet"].map(String::from); + let out = sandbox + .run(&probe, &env, None, Some(Duration::from_secs(600))) + .unwrap_or_else(|e| match e { + agentcap::sandbox::SandboxError::Timeout => panic!("tool-dir probe timed out"), + agentcap::sandbox::SandboxError::Other(e) => panic!("tool-dir probe errored: {e}"), + }); + + assert_eq!( + out.code, 0, + "probe rc={}\n--- stdout ---\n{}\n--- stderr ---\n{}", + out.code, out.stdout, out.stderr + ); + assert!( + out.stdout.contains("/bin/greet"), + "bundle bin/ not on PATH\nstdout:\n{}", + out.stdout + ); + assert!( + out.stdout.contains("TOOLDIR_SENTINEL_OK"), + "mounted tool not runnable\nstdout:\n{}", + out.stdout + ); +} + // hermes and opencode are intentionally omitted — neither runs via `agentcap run` // on the tiny CI model: // - hermes: its base system prompt (~3.9k tokens) exceeds the budget on