Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions containers/agentcap-goose/agentcap-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then
ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills"
fi

# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task
# work can call it. The dir is bind-mounted (read-only) at its host path.
if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then
export PATH="$AGENTCAP_TOOL_BIN:$PATH"
fi

# Record this shell's PID so the sandbox can target the about-to-be
# exec'd agent precisely on timeout. ``exec`` keeps $$.
echo $$ > /tmp/agentcap-current.pid
Expand Down
6 changes: 6 additions & 0 deletions containers/agentcap-hermes/agentcap-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then
ln -sf "$AGENTCAP_SKILLS_DIR/agents/AGENTS.md" "$PWD/AGENTS.md"
fi

# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task
# work can call it. The dir is bind-mounted (read-only) at its host path.
if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then
export PATH="$AGENTCAP_TOOL_BIN:$PATH"
fi

# Record this shell's PID so the sandbox can target the about-to-be
# exec'd agent precisely on timeout. ``exec`` keeps $$, so the value
# stays valid after the replacement.
Expand Down
6 changes: 6 additions & 0 deletions containers/agentcap-opencode/agentcap-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then
ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills"
fi

# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task
# work can call it. The dir is bind-mounted (read-only) at its host path.
if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then
export PATH="$AGENTCAP_TOOL_BIN:$PATH"
fi

# Record this shell's PID so the sandbox can target the about-to-be
# exec'd agent precisely on timeout. ``exec`` keeps $$.
echo $$ > /tmp/agentcap-current.pid
Expand Down
6 changes: 6 additions & 0 deletions containers/agentcap-pi/agentcap-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ if [ -n "${AGENTCAP_SKILLS_DIR:-}" ] && [ -d "$AGENTCAP_SKILLS_DIR" ]; then
ln -sfn "$AGENTCAP_SKILLS_DIR/skills" "$PWD/skills"
fi

# Toolchain mount (agentcap --tool-dir): prepend its bin/ so the agent's task
# work can call it. The dir is bind-mounted (read-only) at its host path.
if [ -n "${AGENTCAP_TOOL_BIN:-}" ] && [ -d "$AGENTCAP_TOOL_BIN" ]; then
export PATH="$AGENTCAP_TOOL_BIN:$PATH"
fi

# Record this shell's PID so the sandbox can target the about-to-be
# exec'd agent precisely on timeout. ``exec`` keeps $$.
echo $$ > /tmp/agentcap-current.pid
Expand Down
7 changes: 6 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ enum Cmd {
/// Host dir with a huggingface/skills checkout (bind-mounted read-only).
#[arg(long)]
skills: Option<String>,
/// Host dir of a self-contained toolchain (e.g. a relocatable venv);
/// bind-mounted read-only and its `bin/` prepended to the agent's PATH.
#[arg(long)]
tool_dir: Option<String>,
/// Plain-text file: one prompt per line (# comments + blanks ignored).
#[arg(long)]
tasks: String,
Expand Down Expand Up @@ -87,12 +91,13 @@ fn main() -> Result<()> {
api_key,
sandbox,
skills,
tool_dir,
tasks,
turns,
followup,
timeout,
} => agentcap::run::run(
agent, model, upstream, api_key, sandbox, skills, tasks, turns, followup, timeout,
agent, model, upstream, api_key, sandbox, skills, tool_dir, tasks, turns, followup, timeout,
),
Cmd::Ls { workspace, long } => agentcap::ls::run(workspace, long),
Cmd::Export {
Expand Down
39 changes: 39 additions & 0 deletions src/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub fn run(
api_key: Option<String>,
sandbox_dir: Option<String>,
skills_dir: Option<String>,
tool_dir: Option<String>,
tasks_file: String,
turns: i64,
followup: String,
Expand Down Expand Up @@ -103,6 +104,12 @@ pub fn run(
env.insert("AGENTCAP_SKILLS_DIR".into(), skills_abs.clone());
readonly.push(PathBuf::from(skills_abs));
}
// A self-contained toolchain dir, mounted read-only at its host path.
if let Some(t) = &tool_dir {
let (tool_bin, mount) = tool_dir_wiring(t);
env.insert("AGENTCAP_TOOL_BIN".into(), tool_bin);
readonly.push(mount);
}
let writable: Vec<PathBuf> = vec![
PathBuf::from(abs(&traces)),
PathBuf::from(abs(&state)),
Expand Down Expand Up @@ -166,6 +173,16 @@ fn abs(p: &Path) -> String {
.into_owned()
}

/// Sandbox wiring for `--tool-dir`: the `AGENTCAP_TOOL_BIN` value (the bundle's
/// `bin/`) and the read-only mount. The mount is the bundle *root*, not `bin/`,
/// so the interpreter and libs that `bin/` shebangs into come too; both are
/// absolute so the src==dst bind keeps those shebangs valid in-container.
fn tool_dir_wiring(tool_dir: &str) -> (String, PathBuf) {
let root = abs(Path::new(tool_dir));
let bin = abs(&Path::new(&root).join("bin"));
(bin, PathBuf::from(root))
}

fn is_hf_router(upstream: &str) -> bool {
url::Url::parse(upstream)
.ok()
Expand Down Expand Up @@ -235,3 +252,25 @@ fn write_run_json(
std::fs::write(workdir.join("run.json"), serde_json::to_string_pretty(&summary)?)?;
Ok(())
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn tool_dir_wiring_points_at_bin_and_mounts_root() {
let (bin, mount) = tool_dir_wiring("/opt/toolbox");
// bin/ is on PATH; the whole bundle (interpreter + libs, not just bin/) is mounted.
assert_eq!(bin, "/opt/toolbox/bin");
assert_eq!(mount, PathBuf::from("/opt/toolbox"));
}

#[test]
fn tool_dir_wiring_absolutizes_relative_paths() {
// Relocatable src==dst mount needs absolute paths even for a relative arg.
let (bin, mount) = tool_dir_wiring("toolbox");
assert!(mount.is_absolute(), "mount not absolute: {mount:?}");
assert!(Path::new(&bin).is_absolute(), "bin not absolute: {bin}");
assert!(bin.ends_with("toolbox/bin"), "bin not under toolbox/bin: {bin}");
}
}
73 changes: 73 additions & 0 deletions tests/live.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
//! AGENTCAP_TEST_LLM_URL=http://127.0.0.1:8000 cargo test --test live -- --ignored
//! Each test skips (passes) if no server is reachable.

use std::collections::BTreeMap;
use std::process::Command;
use std::time::Duration;

Expand Down Expand Up @@ -40,6 +41,14 @@ fn upstream() -> Option<String> {
None
}

/// Is `podman` on PATH? The tool-dir test needs only podman (no model server),
/// so it gates on this rather than [`upstream`].
fn podman_available() -> bool {
std::env::var_os("PATH")
.map(|path| std::env::split_paths(&path).any(|d| d.join("podman").is_file()))
.unwrap_or(false)
}

/// Last `n` chars of `s`, for failure dumps.
fn tail(s: &str, n: usize) -> String {
let start = s.char_indices().rev().take(n).last().map(|(i, _)| i).unwrap_or(0);
Expand Down Expand Up @@ -72,6 +81,10 @@ fn diagnostics(run_dir: &std::path::Path, summary: &Value, bin_stderr: &[u8]) ->
/// `agentcap run --agent <agent>` against the live server; assert the run dir,
/// run.json shape, captures, and (for pi) the streamed JSONL trace.
fn run_agent(agent: &str, expect_jsonl_traces: bool) {
if !podman_available() {
eprintln!("skip live[{agent}]: no podman on PATH");
return;
}
let Some(upstream) = upstream() else {
eprintln!("skip live[{agent}]: no llama server (set AGENTCAP_TEST_LLM_URL or run one on :8000/:8080)");
return;
Expand Down Expand Up @@ -168,6 +181,66 @@ fn live_goose() {
run_agent("goose", false);
}

/// `--tool-dir`: the bundle mounts read-only and its `bin/` lands on the agent's
/// PATH. Driven straight through the sandbox (no model) so the mount + init-script
/// wiring is asserted deterministically; `run()`'s derivation of this env from
/// `--tool-dir` is unit-tested in `run.rs`. Needs only podman (any agent image —
/// the tool-dir init block is identical across all four).
#[test]
#[ignore = "live: needs podman + a built per-agent image"]
fn live_tool_dir_mount() {
if !podman_available() {
eprintln!("skip live[tool-dir]: no podman on PATH");
return;
}

// A self-contained bundle: bin/greet prints a sentinel.
let tmp = tempfile::tempdir().unwrap();
let bundle = tmp.path().join("toolbox");
std::fs::create_dir_all(bundle.join("bin")).unwrap();
let greet = bundle.join("bin/greet");
std::fs::write(&greet, "#!/bin/sh\necho TOOLDIR_SENTINEL_OK\n").unwrap();
{
use std::os::unix::fs::PermissionsExt;
std::fs::set_permissions(&greet, std::fs::Permissions::from_mode(0o755)).unwrap();
}

// The env run() derives for this bundle: bin/ on AGENTCAP_TOOL_BIN, the whole
// bundle mounted read-only at its host path.
let env: BTreeMap<String, String> = BTreeMap::from([(
"AGENTCAP_TOOL_BIN".to_string(),
bundle.join("bin").to_string_lossy().into_owned(),
)]);
let sandbox = agentcap::sandbox::require_sandbox("pi", env.clone(), vec![bundle.clone()], vec![], &|m| {
eprintln!(" [sandbox] {m}")
})
.expect("provision pi sandbox");

let probe = ["sh", "-c", "command -v greet; greet"].map(String::from);
let out = sandbox
.run(&probe, &env, None, Some(Duration::from_secs(600)))
.unwrap_or_else(|e| match e {
agentcap::sandbox::SandboxError::Timeout => panic!("tool-dir probe timed out"),
agentcap::sandbox::SandboxError::Other(e) => panic!("tool-dir probe errored: {e}"),
});

assert_eq!(
out.code, 0,
"probe rc={}\n--- stdout ---\n{}\n--- stderr ---\n{}",
out.code, out.stdout, out.stderr
);
assert!(
out.stdout.contains("/bin/greet"),
"bundle bin/ not on PATH\nstdout:\n{}",
out.stdout
);
assert!(
out.stdout.contains("TOOLDIR_SENTINEL_OK"),
"mounted tool not runnable\nstdout:\n{}",
out.stdout
);
}

// hermes and opencode are intentionally omitted — neither runs via `agentcap run`
// on the tiny CI model:
// - hermes: its base system prompt (~3.9k tokens) exceeds the budget on
Expand Down
Loading