|
| 1 | +/** |
| 2 | + * Security code reviewer demo. |
| 3 | + * |
| 4 | + * Runs the github.com/shreyas-lyzr/security-agent GAP repo against any |
| 5 | + * target repository. Output is a single SECURITY_REVIEW.md fetched from |
| 6 | + * the harness workdir. |
| 7 | + * |
| 8 | + * Usage: |
| 9 | + * |
| 10 | + * # Public repo |
| 11 | + * ANTHROPIC_API_KEY=sk-ant-... bun run examples/security-agent.ts \ |
| 12 | + * https://github.com/<owner>/<repo> |
| 13 | + * |
| 14 | + * # Private repo — supply a GitHub PAT |
| 15 | + * ANTHROPIC_API_KEY=sk-ant-... GITHUB_TOKEN=ghp_... \ |
| 16 | + * bun run examples/security-agent.ts https://github.com/<owner>/<private> |
| 17 | + * |
| 18 | + * # Default target (a tiny intentionally-vulnerable demo) if no argv |
| 19 | + * ANTHROPIC_API_KEY=sk-ant-... bun run examples/security-agent.ts |
| 20 | + */ |
| 21 | +import { mkdir, writeFile } from "node:fs/promises"; |
| 22 | +import { join } from "node:path"; |
| 23 | +import { ComputerAgent, LocalSubstrate } from "computeragent"; |
| 24 | + |
| 25 | +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; |
| 26 | +if (!ANTHROPIC_API_KEY) { |
| 27 | + console.error("Error: ANTHROPIC_API_KEY is not set."); |
| 28 | + process.exit(1); |
| 29 | +} |
| 30 | + |
| 31 | +const GITHUB_TOKEN = process.env.GITHUB_TOKEN ?? ""; |
| 32 | +const TARGET = process.argv[2] ?? "https://github.com/OWASP/NodeGoat"; |
| 33 | + |
| 34 | +const OUT = join(import.meta.dir ?? __dirname, "security-reports"); |
| 35 | +await mkdir(OUT, { recursive: true }); |
| 36 | + |
| 37 | +console.log(`Target: ${TARGET}`); |
| 38 | +console.log(`GITHUB_TOKEN: ${GITHUB_TOKEN ? "(set — private repos accessible)" : "(not set — public only)"}`); |
| 39 | +console.log(`Output dir: ${OUT}\n`); |
| 40 | + |
| 41 | +await using agent = new ComputerAgent({ |
| 42 | + source: { type: "git", url: "github.com/shreyas-lyzr/security-agent" }, |
| 43 | + harness: "claude-agent-sdk", |
| 44 | + runtime: new LocalSubstrate(), |
| 45 | + envs: { |
| 46 | + ANTHROPIC_API_KEY, |
| 47 | + ...(GITHUB_TOKEN ? { GITHUB_TOKEN } : {}), |
| 48 | + }, |
| 49 | + options: { |
| 50 | + permissionMode: "bypassPermissions", |
| 51 | + settingSources: ["project"], |
| 52 | + maxTurns: 60, |
| 53 | + }, |
| 54 | +}); |
| 55 | + |
| 56 | +const startedAt = Date.now(); |
| 57 | +let sessionId = ""; |
| 58 | +let harnessUrl = ""; |
| 59 | +let toolCalls = 0; |
| 60 | +let endedReason = ""; |
| 61 | + |
| 62 | +const message = `Audit the repository at ${TARGET} for security issues. |
| 63 | +
|
| 64 | +Steps: |
| 65 | +1. Clone it (use GITHUB_TOKEN if the repo is private and the env var is set) |
| 66 | +2. Walk the codebase across all nine categories from your skill |
| 67 | +3. Write SECURITY_REVIEW.md to the current working directory |
| 68 | +4. Reply with one line confirming the path |
| 69 | +
|
| 70 | +Be thorough but realistic — every Critical finding must have an explicit exploit path. Skip noise.`; |
| 71 | + |
| 72 | +console.log("Asking the agent to audit. This typically runs 1-5 minutes depending on repo size.\n"); |
| 73 | +console.log(`PROMPT:\n${message}\n${"─".repeat(70)}\n`); |
| 74 | + |
| 75 | +const handle = agent.chat(message); |
| 76 | + |
| 77 | +for await (const ev of handle) { |
| 78 | + if (ev.kind === "ca_session_started") { |
| 79 | + sessionId = ev.sessionId; |
| 80 | + harnessUrl = await agent.harnessUrl(); |
| 81 | + console.log(`[session ${sessionId}]\n`); |
| 82 | + } else if (ev.kind === "sdk_message") { |
| 83 | + const p = ev.payload as Record<string, unknown>; |
| 84 | + if (p.type === "assistant") { |
| 85 | + const msg = p.message as { content?: { type: string; text?: string; name?: string; input?: unknown }[] }; |
| 86 | + for (const block of msg.content ?? []) { |
| 87 | + if (block.type === "text" && block.text) { |
| 88 | + process.stdout.write(block.text); |
| 89 | + } else if (block.type === "tool_use") { |
| 90 | + toolCalls++; |
| 91 | + const inp = JSON.stringify(block.input ?? {}).slice(0, 160); |
| 92 | + console.log(`\n → ${block.name}(${inp}${inp.length >= 160 ? "…" : ""})`); |
| 93 | + } |
| 94 | + } |
| 95 | + } else if (p.type === "user") { |
| 96 | + const msg = p.message as { content?: { type: string; content?: unknown }[] }; |
| 97 | + for (const block of msg.content ?? []) { |
| 98 | + if (block.type === "tool_result") { |
| 99 | + const content = typeof block.content === "string" |
| 100 | + ? block.content |
| 101 | + : JSON.stringify(block.content); |
| 102 | + console.log(` ← ${content.slice(0, 200)}${content.length > 200 ? "…" : ""}`); |
| 103 | + } |
| 104 | + } |
| 105 | + } else if (p.type === "result") { |
| 106 | + const u = p.usage as { input_tokens?: number; output_tokens?: number } | undefined; |
| 107 | + const cost = (p as { total_cost_usd?: number }).total_cost_usd; |
| 108 | + if (u || cost !== undefined) { |
| 109 | + console.log(`\n [usage: in=${u?.input_tokens} out=${u?.output_tokens}${cost !== undefined ? ` cost=$${cost.toFixed(4)}` : ""}]`); |
| 110 | + } |
| 111 | + } |
| 112 | + } else if (ev.kind === "ca_session_ended") { |
| 113 | + endedReason = ev.reason + (ev.errorMessage ? ` — ${ev.errorMessage}` : ""); |
| 114 | + console.log(`\n[ended: ${endedReason}]`); |
| 115 | + } |
| 116 | +} |
| 117 | + |
| 118 | +const elapsed = ((Date.now() - startedAt) / 1000).toFixed(1); |
| 119 | +console.log(`\n${"─".repeat(70)}`); |
| 120 | +console.log(`Done in ${elapsed}s • ${toolCalls} tool calls • status: ${endedReason}\n`); |
| 121 | + |
| 122 | +// Pull SECURITY_REVIEW.md from the workdir. |
| 123 | +// (The agent writes it inside the cloned-target subdirectory.) |
| 124 | +const treeRes = await fetch(`${harnessUrl}/v1/sessions/${sessionId}/fs/tree?depth=10`); |
| 125 | +let reportPath: string | undefined; |
| 126 | +if (treeRes.ok) { |
| 127 | + const tree = (await treeRes.json()) as { entries: { path: string; type: string; size: number }[] }; |
| 128 | + const candidates = tree.entries.filter( |
| 129 | + (e) => e.type === "file" && e.path.toLowerCase().endsWith("security_review.md"), |
| 130 | + ); |
| 131 | + if (candidates.length > 0) { |
| 132 | + // Prefer the largest one (most content) if multiple. |
| 133 | + candidates.sort((a, b) => b.size - a.size); |
| 134 | + reportPath = candidates[0]!.path; |
| 135 | + } |
| 136 | +} |
| 137 | + |
| 138 | +if (!reportPath) { |
| 139 | + console.error("✗ SECURITY_REVIEW.md not found in harness workdir."); |
| 140 | + console.error(" The agent may have errored out or written under a different name."); |
| 141 | + process.exit(1); |
| 142 | +} |
| 143 | + |
| 144 | +const fileRes = await fetch( |
| 145 | + `${harnessUrl}/v1/sessions/${sessionId}/fs/file?path=${encodeURIComponent(reportPath)}`, |
| 146 | +); |
| 147 | +const body = await fileRes.text(); |
| 148 | + |
| 149 | +// Save with a safe local name derived from the target. |
| 150 | +const safeTargetName = TARGET.replace(/^https?:\/\//, "") |
| 151 | + .replace(/[^a-zA-Z0-9_-]+/g, "_") |
| 152 | + .slice(0, 80); |
| 153 | +const stamp = new Date().toISOString().slice(0, 10); |
| 154 | +const localPath = join(OUT, `${safeTargetName}_${stamp}.md`); |
| 155 | +await writeFile(localPath, body, "utf8"); |
| 156 | + |
| 157 | +console.log(`✓ Report saved → ${localPath} (${body.length}b)`); |
| 158 | +console.log(`\nFirst 40 lines:\n${"─".repeat(70)}`); |
| 159 | +console.log(body.split("\n").slice(0, 40).map((l) => " " + l).join("\n")); |
| 160 | +console.log(`${"─".repeat(70)}\n(full report at ${localPath})`); |
0 commit comments