From d461f832550fb82978604baedb9822977fcebfdf Mon Sep 17 00:00:00 2001 From: Chris Geyer Date: Wed, 18 Mar 2026 17:06:59 +0000 Subject: [PATCH 1/2] Add `roar agent` command with process tree tracking New CLI command for tracing AI agents and tools that modify the working tree. Unlike `roar run`, it does not require a clean git state or even a git repository. - `roar agent ` traces file I/O and records job_type="agent" - Process tree stored in job metadata, rendered by `roar show` - Git info captured best-effort (graceful when outside a repo) - Suppressed git stderr noise from provider's get_status Co-Authored-By: Claude Opus 4.6 (1M context) --- roar/cli/__init__.py | 1 + roar/cli/commands/_execution.py | 21 +++- roar/cli/commands/agent.py | 118 ++++++++++++++++++ roar/core/models/run.py | 2 +- roar/plugins/vcs/git.py | 4 +- roar/presenters/show_renderer.py | 42 ++++++- roar/services/execution/job_recording.py | 7 ++ roar/services/execution/provenance/service.py | 20 ++- 8 files changed, 207 insertions(+), 8 deletions(-) create mode 100644 roar/cli/commands/agent.py diff --git a/roar/cli/__init__.py b/roar/cli/__init__.py index 6ac85083..a9f26f3f 100644 --- a/roar/cli/__init__.py +++ b/roar/cli/__init__.py @@ -28,6 +28,7 @@ # Lazy command registry: maps command name to (module_path, command_name, short_help) # Short help is stored here to avoid importing commands just for --help LAZY_COMMANDS: dict[str, tuple[str, str, str]] = { + "agent": ("roar.cli.commands.agent", "agent", "Run an agent with provenance tracking"), "auth": ("roar.cli.commands.auth", "auth", "Manage authentication with GLaaS"), "build": ("roar.cli.commands.build", "build", "Run a build step before the main pipeline"), "config": ("roar.cli.commands.config", "config", "View or set configuration"), diff --git a/roar/cli/commands/_execution.py b/roar/cli/commands/_execution.py index 2a411804..d307ebe2 100644 --- a/roar/cli/commands/_execution.py +++ b/roar/cli/commands/_execution.py @@ -79,6 +79,25 @@ def validate_git_clean() -> str: return repo_root +def get_git_root_optional() -> str: + """Get git repo root if available, otherwise return cwd. + + Unlike ``validate_git_clean``, this does **not** require a git repo and + does **not** check for uncommitted changes. Suitable for ``roar agent`` + where the agent is expected to modify the working tree. + """ + import subprocess + + try: + return subprocess.check_output( + ["git", "rev-parse", "--show-toplevel"], + stderr=subprocess.DEVNULL, + text=True, + ).strip() + except (subprocess.CalledProcessError, FileNotFoundError): + return os.getcwd() + + def get_quiet_setting(quiet_flag: bool | None, repo_root: str | Path) -> bool: """ Get quiet setting from CLI flag or config. @@ -186,7 +205,7 @@ def execute_and_report( # Create run context hash_algos = cast(list[Literal["blake3", "sha256", "sha512", "md5"]], hash_algorithms) - job_type_literal = cast(Literal["run", "build"] | None, job_type) + job_type_literal = cast(Literal["run", "build", "agent"] | None, job_type) run_ctx = RunContext( roar_dir=ctx.roar_dir, repo_root=repo_root, diff --git a/roar/cli/commands/agent.py b/roar/cli/commands/agent.py new file mode 100644 index 00000000..9e218eea --- /dev/null +++ b/roar/cli/commands/agent.py @@ -0,0 +1,118 @@ +""" +Native Click implementation of the agent command. + +Usage: roar agent [options] +""" + +import click + +from ...core.tracer_modes import TRACER_MODE_VALUES +from ..context import RoarContext +from ..decorators import require_init +from ._execution import ( + execute_and_report, + get_git_root_optional, + get_hash_algorithms, + get_quiet_setting, +) + + +@click.command( + "agent", + context_settings={ + "ignore_unknown_options": True, + "allow_extra_args": True, + "allow_interspersed_args": False, + }, +) +@click.argument("args", nargs=-1, type=click.UNPROCESSED) +@click.option("-q", "--quiet", is_flag=True, default=None, help="Suppress output summary") +@click.option("-n", "--name", "step_name", help="Name for this step") +@click.option( + "--tracer", + "tracer_mode", + type=click.Choice(list(TRACER_MODE_VALUES)), + default=None, + help="Tracer backend policy for this run", +) +@click.option( + "--tracer-fallback/--no-tracer-fallback", + "tracer_fallback", + default=None, + help="Allow runtime fallback to another tracer backend", +) +@click.option("--hash", "hash_algorithms", multiple=True, help="Add hash algorithm") +@click.pass_obj +@require_init +def agent( + ctx: RoarContext, + args: tuple[str, ...], + quiet: bool | None, + step_name: str | None, + tracer_mode: str | None, + tracer_fallback: bool | None, + hash_algorithms: tuple[str, ...], +) -> None: + """Run an agent with provenance tracking. + + Like 'roar run' but does not require a clean git working tree. + Tracks all file I/O performed by the agent and its subprocesses. + + \\b + Examples: + roar agent codex + roar agent bash ./my-agent-script.sh + roar agent python my_agent.py + """ + args_list = list(args) + + if not args_list or args_list[0] in ("-h", "--help"): + click.echo(_get_help_text()) + return + + repo_root = get_git_root_optional() + quiet_setting = get_quiet_setting(quiet, repo_root) + algorithms = get_hash_algorithms(list(hash_algorithms) if hash_algorithms else None) + + command = args_list + if not command: + click.echo(_get_help_text()) + raise click.ClickException("No command specified") + + exit_code = execute_and_report( + ctx=ctx, + command=command, + job_type="agent", + step_name=step_name, + quiet=quiet_setting, + hash_algorithms=algorithms, + repo_root=repo_root, + tracer_mode=tracer_mode, + tracer_fallback=tracer_fallback, + ) + + if exit_code != 0: + raise SystemExit(exit_code) + + +def _get_help_text() -> str: + """Get help text for the agent command.""" + return """Usage: roar agent [options] [args...] + +Run an agent with provenance tracking. + +Unlike 'roar run', this does not require a clean git working tree. +The agent and all its subprocesses are traced for file I/O. + +Options: + --quiet, -q Suppress output summary + --tracer Tracer policy: auto, ebpf, preload, ptrace + --tracer-fallback Enable runtime tracer fallback + --no-tracer-fallback Disable runtime tracer fallback + --hash Add hash algorithm (can be repeated) + -n, --name Name for this step + +Examples: + roar agent codex + roar agent bash ./my-agent-script.sh + roar agent python my_agent.py""" diff --git a/roar/core/models/run.py b/roar/core/models/run.py index 21c21a52..c5138c85 100644 --- a/roar/core/models/run.py +++ b/roar/core/models/run.py @@ -16,7 +16,7 @@ # Type aliases HashAlgorithm = Literal["blake3", "sha256", "sha512", "md5"] -JobType = Literal["run", "build"] +JobType = Literal["run", "build", "agent"] class RunArguments(ImmutableModel): diff --git a/roar/plugins/vcs/git.py b/roar/plugins/vcs/git.py index 31d0e656..fe131fc9 100644 --- a/roar/plugins/vcs/git.py +++ b/roar/plugins/vcs/git.py @@ -115,7 +115,9 @@ def get_info(self, repo_root: str) -> VCSInfo: def get_status(self, repo_root: str) -> tuple[bool, list[str]]: """Get the git working tree status.""" try: - out = subprocess.check_output(["git", "status", "--porcelain=v1"], cwd=repo_root) + out = subprocess.check_output( + ["git", "status", "--porcelain=v1"], cwd=repo_root, stderr=subprocess.DEVNULL + ) lines = out.decode().splitlines() clean = len(lines) == 0 return clean, lines diff --git a/roar/presenters/show_renderer.py b/roar/presenters/show_renderer.py index 1031c763..fbbff1e5 100644 --- a/roar/presenters/show_renderer.py +++ b/roar/presenters/show_renderer.py @@ -134,6 +134,13 @@ def render_job( lines.append(f"\nCommand: {job['command']}") + # Agent process tree + meta = job.get("metadata") + if job.get("job_type") == "agent" and meta and isinstance(meta, dict): + process_tree = meta.get("process_tree") + if process_tree: + self._render_agent_process_tree(lines, process_tree) + # Git info if job.get("git_commit"): lines.append(f"\nGit commit: {job['git_commit']}") @@ -141,7 +148,6 @@ def render_job( lines.append(f"Git branch: {job['git_branch']}") # Metadata (what gets registered with GLaaS) - meta = job.get("metadata") if meta and isinstance(meta, dict): lines.append("\nMetadata:") @@ -259,6 +265,40 @@ def render_job( return "\n".join(lines) + @staticmethod + def _format_process_cmd(cmd: list[str] | None, max_len: int = 120) -> str: + """Format a command list as a single-line display string.""" + import shlex + + if not cmd: + return "(unknown)" + cmd_str = shlex.join(cmd).replace("\n", " ").replace(" ", " ") + if len(cmd_str) > max_len: + cmd_str = cmd_str[: max_len - 3] + "..." + return cmd_str + + @classmethod + def _render_agent_process_tree(cls, lines: list[str], tree: list[dict]) -> None: + """Render executed commands for an agent job.""" + if not tree: + return + + lines.append("\nExecuted commands:") + for node in tree: + cls._render_process_node(lines, node, depth=0) + + @classmethod + def _render_process_node( + cls, lines: list[str], node: dict, depth: int + ) -> None: + """Recursively render a process tree node with indentation.""" + cmd_str = cls._format_process_cmd(node.get("command")) + indent = " " + " " * depth + connector = "" if depth == 0 else "- " + lines.append(f"{indent}{connector}{cmd_str}") + for child in node.get("children", []): + cls._render_process_node(lines, child, depth + 1) + def render_artifact( self, artifact: dict, diff --git a/roar/services/execution/job_recording.py b/roar/services/execution/job_recording.py index 0154ea58..bb6810c7 100644 --- a/roar/services/execution/job_recording.py +++ b/roar/services/execution/job_recording.py @@ -557,6 +557,13 @@ def _build_metadata_json( except Exception: pass + # For agent jobs, include the process tree so `roar show` can render + # the top-level commands the agent spawned. + if getattr(ctx, "job_type", None) == "agent": + process_tree = prov.get("processes") + if process_tree: + metadata["process_tree"] = process_tree + return json.dumps(metadata) if metadata else None def _build_telemetry_json(self, repo_root: str, start_time: float) -> str | None: diff --git a/roar/services/execution/provenance/service.py b/roar/services/execution/provenance/service.py index 4b0bcc63..a3003506 100644 --- a/roar/services/execution/provenance/service.py +++ b/roar/services/execution/provenance/service.py @@ -284,15 +284,27 @@ def _build_process_info(self, processes: list) -> list: return process_info def _get_git_info(self, repo_root: str) -> dict[str, Any]: - """Get git info via VCS provider.""" + """Get git info via VCS provider. + + Returns an empty dict if the repo_root is not inside a git repository + (e.g. when invoked via ``roar agent`` outside a repo). + """ try: - vcs = get_container().get_vcs_provider("git") + from ....integrations import get_vcs_provider + + vcs = get_vcs_provider("git") vcs_info = vcs.get_info(repo_root) except KeyError: - # Defensive fallback if plugin bootstrap/registration was skipped. from ....plugins.vcs.git import GitVCSProvider - vcs_info = GitVCSProvider().get_info(repo_root) + try: + vcs_info = GitVCSProvider().get_info(repo_root) + except Exception: + self.logger.debug("Git info unavailable for %s", repo_root) + return {} + except Exception: + self.logger.debug("Git info unavailable for %s", repo_root) + return {} return { "commit": vcs_info.commit, From a3f0e6d49af92df637143fc6196e22f81d3459e9 Mon Sep 17 00:00:00 2001 From: Chris Geyer Date: Wed, 18 Mar 2026 16:50:15 +0000 Subject: [PATCH 2/2] Add fork/exec process tracking to preload tracer The preload tracer previously only discovered processes when they performed file I/O. Short-lived children like `ls` were invisible. Add Fork and Exec variants to NativeTraceEvent. Interpose fork() in the C shim so the launcher learns about child processes immediately. A library constructor sends an Exec event after successful exec, carrying the new command from /proc/self/cmdline. The launcher reconciles out-of-order Exec-before-Fork by back-filling parent_pid when the Fork event arrives. Co-Authored-By: Claude Opus 4.6 (1M context) --- rust/crates/tracer-schema/src/lib.rs | 8 +++++ rust/tracers/preload/src/interpose.c | 31 ++++++++++++++++ rust/tracers/preload/src/lib.rs | 39 ++++++++++++++++++++ rust/tracers/preload/src/main.rs | 53 ++++++++++++++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/rust/crates/tracer-schema/src/lib.rs b/rust/crates/tracer-schema/src/lib.rs index f4d077ed..4a1ef353 100644 --- a/rust/crates/tracer-schema/src/lib.rs +++ b/rust/crates/tracer-schema/src/lib.rs @@ -15,6 +15,14 @@ pub enum NativeTraceEvent { thread_id: u32, path: String, }, + Fork { + parent_pid: u32, + child_pid: u32, + }, + Exec { + pid: u32, + command: Vec, + }, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/rust/tracers/preload/src/interpose.c b/rust/tracers/preload/src/interpose.c index e5d6ce25..2c90588e 100644 --- a/rust/tracers/preload/src/interpose.c +++ b/rust/tracers/preload/src/interpose.c @@ -187,4 +187,35 @@ int creat(const char *path, mode_t mode) { } return ret; } + +/* ── fork/vfork interposition ─────────────────────────────── */ + +extern void roar_preload_emit_fork(unsigned int parent_pid, + unsigned int child_pid); + +pid_t fork(void) { + static pid_t (*real_fork)(void) = NULL; + if (real_fork == NULL) { + real_fork = (pid_t (*)(void))dlsym(RTLD_NEXT, "fork"); + } + if (real_fork == NULL) { + return -1; + } + + pid_t ret = real_fork(); + if (ret > 0) { + /* Parent side — successful fork. */ + roar_preload_emit_fork((unsigned int)getpid(), (unsigned int)ret); + } + return ret; +} + +/* ── exec notification via constructor ────────────────────── */ + +extern void roar_preload_notify_exec(void); + +__attribute__((constructor)) static void roar_preload_ctor(void) { + roar_preload_notify_exec(); +} + #endif diff --git a/rust/tracers/preload/src/lib.rs b/rust/tracers/preload/src/lib.rs index 2156631a..e1060441 100644 --- a/rust/tracers/preload/src/lib.rs +++ b/rust/tracers/preload/src/lib.rs @@ -744,6 +744,45 @@ fn resolve_at_path(dirfd: c_int, path: *const c_char) -> Option { Some(format!("{base}/{path_s}")) } +// ── fork/exec event emission ────────────────────────────────────────────────── + +/// Called from C `fork()` interposition after a successful fork (parent side). +#[cfg_attr(not(target_os = "macos"), no_mangle)] +pub unsafe extern "C" fn roar_preload_emit_fork(parent_pid: u32, child_pid: u32) { + if in_hook() { + return; + } + with_hook_guard(|| { + send_event(&TraceEvent::Fork { + parent_pid, + child_pid, + }); + }); +} + +/// Called from C constructor after library load. Sends an Exec event so the +/// launcher learns the command of newly exec'd processes. +#[cfg_attr(not(target_os = "macos"), no_mangle)] +pub unsafe extern "C" fn roar_preload_notify_exec() { + // Only send if ROAR_PRELOAD_TRACE_SOCK is set (i.e. we're inside a traced tree). + if trace_sock_path().is_none() { + return; + } + let pid = current_pid(); + let Ok(cmdline) = std::fs::read(format!("/proc/{pid}/cmdline")) else { + return; + }; + let command: Vec = cmdline + .split(|b| *b == 0) + .filter(|s| !s.is_empty()) + .map(|s| String::from_utf8_lossy(s).into_owned()) + .collect(); + if command.is_empty() { + return; + } + send_event(&TraceEvent::Exec { pid, command }); +} + #[cfg_attr(not(target_os = "macos"), no_mangle)] pub unsafe extern "C" fn roar_preload_emit_path_flags(path: *const c_char, flags: c_int) { if in_hook() { diff --git a/rust/tracers/preload/src/main.rs b/rust/tracers/preload/src/main.rs index 1db12ce0..851f59b9 100644 --- a/rust/tracers/preload/src/main.rs +++ b/rust/tracers/preload/src/main.rs @@ -78,6 +78,59 @@ impl CollectorState { self.fd.mark_path_open(path.clone()); self.fd.mark_path_written_with_thread(path, thread_id); } + TraceEvent::Fork { + parent_pid, + child_pid, + } => { + self.ensure_process(parent_pid); + if let Some(existing) = self.processes.get_mut(&child_pid) { + // Exec event may have arrived first — fill in parent. + if existing.parent_pid.is_none() && child_pid != self.root_pid { + existing.parent_pid = Some(parent_pid); + } + } else { + let info = capture_process_info(child_pid, Some(parent_pid)) + .unwrap_or_else(|| ProcessInfo { + pid: child_pid, + parent_pid: Some(parent_pid), + command: self + .processes + .get(&parent_pid) + .map(|p| p.command.clone()) + .unwrap_or_default(), + env: HashMap::new(), + }); + self.processes.insert(child_pid, info); + } + } + TraceEvent::Exec { pid, command } => { + // Update the process entry with the post-exec command. + if let Some(proc_info) = self.processes.get_mut(&pid) { + proc_info.command = command; + } else { + // Exec arrived before Fork (common for fast children). + // Try /proc for parent; if unavailable, mark as pending — + // a later Fork event will fill in parent_pid. + let parent = if pid == self.root_pid { + None + } else { + parent_pid_from_proc(pid) + }; + self.processes.insert( + pid, + ProcessInfo { + pid, + parent_pid: parent, + command, + env: if pid == self.root_pid { + self.root_env.clone() + } else { + HashMap::new() + }, + }, + ); + } + } } }