From a4e10162c088d8dfa000dc3ab196f6199f2fb6b1 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 27 May 2026 10:17:20 -0700 Subject: [PATCH] fix(supervisor,hub): kill legacy agent spawn + cap orphan replay + cap restarts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Structural fixes from the 2026-05-27 autonomous-loop RCA. Supervisor (process-manager.ts): - spawn() no longer invokes the retired CLI agent via npx -y. Every session.start now finalizes immediately as stopped with exit_reason='legacy_agent_spawn_disabled'. The cached buggy v0.4.1 of that retired package was the source of the "Always delegate" loop. - scheduleRestart() now hard-caps at MAX_RESTART_COUNT=10. After 10 consecutive attempts the run finalizes as max_restarts_exceeded and the supervisor stops respawning, instead of looping forever at the 30s backoff plateau. Hub auto-resume (ws/agent.ts on supervisor.hello): - Filters orphan session_runs to rows where started_at > now() - 24h. - Sweeps older open rows in one UPDATE, finalizing them as exit_reason='stale'. - Skips replay for any orphan whose restart_count >= 10, finalizing as exit_reason='max_restarts_exceeded'. Tests: - supervisor/test/no-legacy-agent-spawn.test.ts — canary that greps supervisor/src/** for the retired package name and --append-system-prompt flag. Fails the build if either reappears. - supervisor/test/process-manager.test.ts — happy-path test rewritten to assert the new legacy_agent_spawn_disabled stopped state; concurrency + audit tests now monkey-patch spawn() to hold the slot so the cap gates remain testable until the inline runner lands. - hub/test/auto-resume-caps.test.ts — DB-gated unit test covering the 24h age cap and restart_count >= 10 cap (skipped without REMO_E2E_DB_URL). Version bump: supervisor 0.5.1 → 0.5.2 across Cargo.toml, tauri.conf.json, ui/package.json, and the inline VERSION constants in supervisor/src/{index, hub-client}.ts. Follow-up not in this PR: the in-process claude-runner that bridges stream-json over the supervisor WS. Until that lands, session.start rolls to stopped instead of trapping the supervisor in a respawn loop. Co-Authored-By: Claude Opus 4.7 (1M context) --- CLAUDE.md | 8 + hub/src/ws/agent.ts | 40 +++- hub/test/auto-resume-caps.test.ts | 176 ++++++++++++++++++ supervisor/src/hub-client.ts | 2 +- supervisor/src/index.ts | 2 +- supervisor/src/process-manager.ts | 111 +++++------ supervisor/tauri/src-tauri/Cargo.toml | 2 +- supervisor/tauri/src-tauri/tauri.conf.json | 2 +- supervisor/tauri/ui/package.json | 2 +- supervisor/test/no-legacy-agent-spawn.test.ts | 138 ++++++++++++++ supervisor/test/process-manager.test.ts | 67 ++++--- 11 files changed, 449 insertions(+), 101 deletions(-) create mode 100644 hub/test/auto-resume-caps.test.ts create mode 100644 supervisor/test/no-legacy-agent-spawn.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index b83931fa..b9c4a563 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -69,6 +69,14 @@ cd supervisor/tauri && cargo tauri build The legacy `npx remo-code-agent` / `claude-remote` shell-alias flow is retired as of 2026-05-26. Install the Tauri Supervisor MSI from https://github.com/finedesignz/remo-code/releases/latest instead. +**Phase 09 follow-up (2026-05-27): legacy spawn path is hard-disabled in supervisor.** The supervisor's `process-manager.ts:spawn()` no longer invokes the retired CLI agent at all — it immediately finalizes every `session.start` as `stopped` with `exit_reason='legacy_agent_spawn_disabled'`. The in-process claude-runner that will replace it (direct `claude --input-format stream-json` spawn bridged over the supervisor WS) is a separate follow-up phase. Until that lands, `session.start` rolls cleanly to stopped instead of trapping the supervisor in a respawn loop against a cached buggy v0.4.1 agent. + +**Guards added in the same fix:** + +- Supervisor `BACKOFF_SCHEDULE` now caps at `MAX_RESTART_COUNT = 10` — after 10 consecutive restart attempts the run finalizes as `max_restarts_exceeded` and stops respawning. +- Hub auto-resume on `supervisor.hello` (in `hub/src/ws/agent.ts`) now (a) filters orphan `session_runs` to rows newer than 24h, sweeping older rows as `exit_reason='stale'`, and (b) finalizes any orphan with `restart_count >= 10` as `max_restarts_exceeded` and skips the replay. +- Canary test `supervisor/test/no-legacy-agent-spawn.test.ts` greps `supervisor/src/**` for the retired `remo-code-agent` package name and `--append-system-prompt` flag; the build FAILS if either reappears. + ## Local Supervisor (only supported connection) The supervisor (`supervisor/src/index.ts`, compiled into the Tauri sidecar binary) runs on the dev machine as a tray app. It: diff --git a/hub/src/ws/agent.ts b/hub/src/ws/agent.ts index 8117878d..5267d35c 100644 --- a/hub/src/ws/agent.ts +++ b/hub/src/ws/agent.ts @@ -512,17 +512,53 @@ async function handleSupervisorMessage(ws: ServerWebSocket, msg: an // These were orphaned by a reboot/restart. We end the old run row and send a // fresh session.start to the now-online supervisor. The new run reuses the // same project_dir, so the UI session row is reused and history persists. + // + // Guards added 2026-05-27 after the autonomous-loop RCA: + // 1. AGE CAP — only resume runs that started in the last 24h. Older + // open rows are stale carryovers from a long-gone session; replaying + // them produces zombie sessions the user has forgotten about. + // 2. RESTART CAP — when restart_count >= 10, finalize the run as + // `max_restarts_exceeded` and skip the replay. Prevents the hub from + // feeding the same broken spawn back into the supervisor over and + // over after a reconnect. try { const { sql } = await import('../db/postgres') + const MAX_RESTART_COUNT = 10 const orphans = await sql` - SELECT id, repo_path, branch, initial_prompt + SELECT id, repo_path, branch, initial_prompt, restart_count, started_at FROM session_runs - WHERE supervisor_id = ${row.id} AND ended_at IS NULL + WHERE supervisor_id = ${row.id} + AND ended_at IS NULL + AND started_at > now() - interval '24 hours' ORDER BY started_at ASC ` + // Sweep any open rows that fell outside the 24h window — finalize them + // as `stale` so they don't reappear on the next reconnect. + const staleSweep = await sql` + UPDATE session_runs + SET ended_at = now(), exit_reason = 'stale' + WHERE supervisor_id = ${row.id} + AND ended_at IS NULL + AND started_at <= now() - interval '24 hours' + RETURNING id + ` + if (staleSweep.length > 0) { + console.log(`[supervisor] auto-resume finalized ${staleSweep.length} stale run(s) older than 24h`) + } if (orphans.length > 0) { console.log(`[supervisor] auto-resuming ${orphans.length} orphan session(s)`) for (const o of orphans) { + // Restart-count cap — if this run has already been restarted too + // many times, finalize it instead of replaying. Stops runaway loops. + if (typeof o.restart_count === 'number' && o.restart_count >= MAX_RESTART_COUNT) { + await sql` + UPDATE session_runs + SET ended_at = now(), exit_reason = 'max_restarts_exceeded' + WHERE id = ${o.id} + ` + console.warn(`[supervisor] auto-resume skipped run=${o.id} reason=max_restarts_exceeded restart_count=${o.restart_count}`) + continue + } // End the orphan FIRST so it doesn't count against the cap when we // reserve a slot for its replacement. await sql`UPDATE session_runs SET ended_at = now(), exit_reason = 'reboot' WHERE id = ${o.id}` diff --git a/hub/test/auto-resume-caps.test.ts b/hub/test/auto-resume-caps.test.ts new file mode 100644 index 00000000..62706d5e --- /dev/null +++ b/hub/test/auto-resume-caps.test.ts @@ -0,0 +1,176 @@ +/** + * Auto-resume guards (Phase 09 follow-up, 2026-05-27 RCA). + * + * Verifies the two new caps on hub/src/ws/agent.ts:supervisor.hello auto-resume: + * + * 1. AGE CAP — only resume `session_runs` rows where `started_at > now() - 24h`. + * Older open rows are swept and finalized as `exit_reason='stale'`. + * 2. RESTART CAP — when `restart_count >= 10`, finalize the run as + * `exit_reason='max_restarts_exceeded'` and skip the replay. + * + * Gated on REMO_E2E_DB_URL so the rest of `bun test` stays green without a DB. + */ + +process.env.JWT_SECRET = process.env.JWT_SECRET || 'test-secret-at-least-32-chars-long-aaaaaaaa' +process.env.SESSION_SECRET = process.env.SESSION_SECRET || 'session-secret-at-least-32-chars-long-x' +process.env.MAGIC_LINK_SECRET = process.env.MAGIC_LINK_SECRET || 'magic-link-secret-at-least-32-chars-x' +process.env.TITANIUM_KEYGEN_API_URL = process.env.TITANIUM_KEYGEN_API_URL || 'https://keygen.titaniumlabs.us' +process.env.TITANIUM_KEYGEN_ACCOUNT_ID = process.env.TITANIUM_KEYGEN_ACCOUNT_ID || 'acct_test_0000000000' +process.env.TITANIUM_KEYGEN_PRODUCT_ID = process.env.TITANIUM_KEYGEN_PRODUCT_ID || 'prod_test_remo' +if (process.env.REMO_E2E_DB_URL) process.env.DATABASE_URL = process.env.REMO_E2E_DB_URL + +import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test' + +const HAS_TEST_DB = !!process.env.REMO_E2E_DB_URL +const maybe = HAS_TEST_DB ? describe : describe.skip + +const TEST_USER_ID = '00000000-0000-0000-0000-0000000ar001' +const TEST_API_KEY_ID = 'apikey_ar001' +const TEST_SUPERVISOR_ID = 'sup_ar001' + +let sql: any + +async function seed() { + await sql.unsafe(` + INSERT INTO users (id, email, password_hash, role) + VALUES ('${TEST_USER_ID}', 'ar001+autoresume@test.local', 'x', 'user') + ON CONFLICT (id) DO NOTHING; + `) + await sql.unsafe(` + INSERT INTO api_keys (id, user_id, key_hash, capabilities, name) + VALUES ('${TEST_API_KEY_ID}', '${TEST_USER_ID}', 'ar001-hash', '["supervisor"]'::jsonb, 'ar001') + ON CONFLICT (id) DO NOTHING; + `) + await sql.unsafe(` + INSERT INTO supervisors (id, user_id, api_key_id, hostname, roots, concurrency_budget, last_seen_at) + VALUES ('${TEST_SUPERVISOR_ID}', '${TEST_USER_ID}', '${TEST_API_KEY_ID}', 'ar001-host', '{}'::text[], 4, now()) + ON CONFLICT (id) DO NOTHING; + `) +} + +async function cleanupRuns() { + await sql`DELETE FROM session_runs WHERE supervisor_id = ${TEST_SUPERVISOR_ID}` +} + +maybe('auto-resume age + restart-count caps', () => { + beforeAll(async () => { + ;({ sql } = await import('../src/db/postgres')) + await seed() + }) + + afterAll(async () => { + await cleanupRuns() + await sql`DELETE FROM supervisors WHERE id = ${TEST_SUPERVISOR_ID}` + await sql`DELETE FROM api_keys WHERE id = ${TEST_API_KEY_ID}` + await sql`DELETE FROM users WHERE id = ${TEST_USER_ID}` + }) + + beforeEach(async () => { + await cleanupRuns() + }) + + test('fresh run (<24h old, restart_count=0) IS picked up by the orphan query', async () => { + await sql` + INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count) + VALUES ('run_fresh_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/fresh', now() - interval '2 hours', NULL, 0) + ` + const rows = await sql` + SELECT id, restart_count, started_at + FROM session_runs + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at > now() - interval '24 hours' + ORDER BY started_at ASC + ` + expect(rows.length).toBe(1) + expect(rows[0].id).toBe('run_fresh_1') + expect(rows[0].restart_count).toBe(0) + }) + + test('stale run (>24h old) is EXCLUDED from the orphan query AND finalized by the sweep', async () => { + await sql` + INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at) + VALUES ('run_stale_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/stale', now() - interval '48 hours', NULL) + ` + // Mirror agent.ts: SELECT excludes >24h + const orphans = await sql` + SELECT id FROM session_runs + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at > now() - interval '24 hours' + ` + expect(orphans.length).toBe(0) + + // Mirror agent.ts: UPDATE finalizes anything older than 24h as stale. + const swept = await sql` + UPDATE session_runs + SET ended_at = now(), exit_reason = 'stale' + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at <= now() - interval '24 hours' + RETURNING id, exit_reason + ` + expect(swept.length).toBe(1) + expect(swept[0].id).toBe('run_stale_1') + expect(swept[0].exit_reason).toBe('stale') + }) + + test('run with restart_count >= 10 is identified by the cap check (skipped + finalized)', async () => { + await sql` + INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count) + VALUES ('run_loopy_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/loopy', now() - interval '1 hour', NULL, 10) + ` + const orphans = await sql` + SELECT id, restart_count FROM session_runs + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at > now() - interval '24 hours' + ` + expect(orphans.length).toBe(1) + expect(orphans[0].restart_count).toBe(10) + + // The replay code finalizes these and continues. Simulate that step. + await sql` + UPDATE session_runs + SET ended_at = now(), exit_reason = 'max_restarts_exceeded' + WHERE id = 'run_loopy_1' + ` + const after = await sql` + SELECT ended_at, exit_reason FROM session_runs WHERE id = 'run_loopy_1' + ` + expect(after[0].exit_reason).toBe('max_restarts_exceeded') + expect(after[0].ended_at).not.toBeNull() + }) + + test('combination: stale-and-loopy is finalized via the stale sweep (age guard wins)', async () => { + // A run that's both >24h old AND has restart_count=10. The age sweep + // fires first (in agent.ts), so the row is finalized as 'stale' and the + // restart-count gate never sees it. This matches the in-code order. + await sql` + INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count) + VALUES ('run_stale_loopy', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/sl', now() - interval '30 hours', NULL, 10) + ` + const orphans = await sql` + SELECT id FROM session_runs + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at > now() - interval '24 hours' + ` + expect(orphans.length).toBe(0) + const swept = await sql` + UPDATE session_runs + SET ended_at = now(), exit_reason = 'stale' + WHERE supervisor_id = ${TEST_SUPERVISOR_ID} + AND ended_at IS NULL + AND started_at <= now() - interval '24 hours' + RETURNING id, exit_reason + ` + expect(swept.length).toBe(1) + expect(swept[0].exit_reason).toBe('stale') + }) +}) + +if (!HAS_TEST_DB) { + // Sanity skip so the suite doesn't appear empty in CI. + test.skip('auto-resume caps test suite skipped — set REMO_E2E_DB_URL to run', () => {}) +} diff --git a/supervisor/src/hub-client.ts b/supervisor/src/hub-client.ts index cbb4214c..f04549a0 100644 --- a/supervisor/src/hub-client.ts +++ b/supervisor/src/hub-client.ts @@ -9,7 +9,7 @@ import { getHandler, nativeSupervisorCommands } from './commands/index' import { CONFIG_PATH, saveConfig, type SupervisorConfig } from './config' // Keep in sync with supervisor/tauri/src-tauri/tauri.conf.json version -const VERSION = '0.5.1' +const VERSION = '0.5.2' type OutboundMsg = | { type: 'auth'; api_key: string; project_dir: string; hostname: string; role: 'supervisor' } diff --git a/supervisor/src/index.ts b/supervisor/src/index.ts index c30a0da4..e862ad35 100644 --- a/supervisor/src/index.ts +++ b/supervisor/src/index.ts @@ -7,7 +7,7 @@ import { join } from 'path' import { homedir } from 'os' // Keep in sync with supervisor/tauri/src-tauri/tauri.conf.json version -const VERSION = '0.5.1' +const VERSION = '0.5.2' function logDir(): string { if (process.platform === 'win32') { diff --git a/supervisor/src/process-manager.ts b/supervisor/src/process-manager.ts index 07a407e2..5baae00e 100644 --- a/supervisor/src/process-manager.ts +++ b/supervisor/src/process-manager.ts @@ -19,7 +19,7 @@ export interface RunSpec { } export interface StartRejection { - reason: 'sandbox_escape' | 'not_git_repo' | 'concurrency_cap' | 'duplicate_run' + reason: 'sandbox_escape' | 'not_git_repo' | 'concurrency_cap' | 'duplicate_run' | 'legacy_agent_spawn_disabled' detail?: Record } @@ -31,6 +31,10 @@ export interface ProcessManagerCallbacks { const BACKOFF_SCHEDULE = [1000, 2000, 4000, 8000, 16000, 30000] const CIRCUIT_WINDOW_MS = 10 * 60_000 const CIRCUIT_THRESHOLD = 5 +/** Phase 09 follow-up — hard cap on restart attempts. After this, the run is + * finalized as `max_restarts_exceeded` and the supervisor stops respawning it. + * Prevents runaway loops when the same broken spawn keeps crashing. */ +const MAX_RESTART_COUNT = 10 interface RunInstance { spec: RunSpec @@ -193,76 +197,49 @@ export class ProcessManager { this.setState(run, 'starting', { runId: spec.runId, repoPath: spec.repoPath }) run.stderrTail = [] - // Windows resolves npx → npx.cmd via PATHEXT; Bun.spawn doesn't, so name it explicitly. - const npx = process.platform === 'win32' ? 'npx.cmd' : 'npx' - const cmd = [ - npx, '-y', 'remo-code-agent', - '--api-key', spec.apiKey, - '--hub-url', spec.hubUrl, - '--project-dir', spec.repoPath, - ] - if (spec.initialPrompt) { - cmd.push('--initial-prompt', spec.initialPrompt) - } - if (spec.dangerouslySkipPermissions && this.cfg.allowDangerousSkipPermissions) { - cmd.push('--dangerously-skip-permissions') - } - const env = { ...process.env } - delete (env as any).ANTHROPIC_API_KEY - - const spawnOpts = { - cwd: spec.repoPath, - stdin: 'pipe' as const, - stdout: 'pipe' as const, - stderr: 'pipe' as const, - env, - windowsHide: true, - } - try { - run.proc = this.spawnImpl ? this.spawnImpl(cmd, spawnOpts) : Bun.spawn(cmd, spawnOpts) - } catch (err: any) { - this.cb.onLog('error', `failed to spawn agent: ${err.message}`, spec.runId) - this.setState(run, 'crashed', { runId: spec.runId, repoPath: spec.repoPath, lastExit: { code: null, reason: `spawn_error: ${err.message}` } }) - this.scheduleRestart(run, null, 'spawn_error') - return - } - - const pid = run.proc.pid - this.cb.onLog('info', `agent spawned pid=${pid} in ${spec.repoPath}`, spec.runId) - this.setState(run, 'running', { runId: spec.runId, repoPath: spec.repoPath, pid }) - - this.consumeStdout(run) - this.consumeStderr(run) - - run.proc.exited.then((code) => { - const reason = run.userStop ? 'user' : code === 0 ? 'clean' : 'crash' - const tail = run.stderrTail.slice(-40).join('\n') - this.cb.onLog(reason === 'crash' ? 'error' : 'info', `agent exited code=${code} reason=${reason}`, spec.runId) - - if (run.userStop) { - this.setState(run, 'idle', { runId: spec.runId, lastExit: { code: code ?? null, reason, stderrTail: tail } }) - this.runs.delete(spec.runId) - return - } - if (reason === 'clean') { - this.setState(run, 'idle', { runId: spec.runId, lastExit: { code: code ?? null, reason, stderrTail: tail } }) - this.runs.delete(spec.runId) - return - } - // crash - run.recentCrashes.push(Date.now()) - run.recentCrashes = run.recentCrashes.filter((t) => Date.now() - t < CIRCUIT_WINDOW_MS) - if (run.recentCrashes.length >= CIRCUIT_THRESHOLD) { - this.cb.onLog('error', `circuit breaker open — stopping`, spec.runId) - this.setState(run, 'stopped', { runId: spec.runId, lastExit: { code: code ?? null, reason: 'circuit_open', stderrTail: tail } }) - this.runs.delete(spec.runId) - return - } - this.scheduleRestart(run, code ?? null, 'crash', tail) + // Phase 09 retired the user-facing legacy CLI agent on 2026-05-26. The + // cached v0.4.1 of that retired package contained the "Always delegate" + // autonomous-loop bug. Until the in-process claude-runner (stream-json + // over stdin/stdout, bridged to the hub WS) is built as its own follow-up + // phase, this spawn path is DISABLED. The supervisor refuses the run and + // emits a structured stopped state so the hub finalizes the run row + // cleanly instead of trapping it in an indefinite respawn loop. + // + // Test canary: supervisor/test/no-legacy-agent-spawn.test.ts greps the + // tree for the retired package name and forbidden flags; if any reappear + // in supervisor source, that test FAILS the build. + this.cb.onLog( + 'error', + `[security] legacy_agent_spawn_disabled: the retired CLI spawn path is disabled in Phase 09; session.start cannot proceed until the in-process claude-runner lands. Run will be finalized as stopped.`, + spec.runId, + ) + this.setState(run, 'stopped', { + runId: spec.runId, + repoPath: spec.repoPath, + lastExit: { code: null, reason: 'legacy_agent_spawn_disabled' }, }) + this.runs.delete(spec.runId) + return } private scheduleRestart(run: RunInstance, exitCode: number | null, reason: string, stderrTail = '') { + // Hard cap on restart attempts. After MAX_RESTART_COUNT consecutive + // restarts the supervisor stops respawning the run and finalizes it + // as `max_restarts_exceeded`. Prevents runaway loops (e.g. the cached + // buggy v0.4.1 agent that triggered this fix in the first place). + if (run.restartCount >= MAX_RESTART_COUNT) { + this.cb.onLog( + 'error', + `max_restarts_exceeded — giving up after ${run.restartCount} restart attempts`, + run.spec.runId, + ) + this.setState(run, 'stopped', { + runId: run.spec.runId, + lastExit: { code: exitCode, reason: 'max_restarts_exceeded', stderrTail }, + }) + this.runs.delete(run.spec.runId) + return + } const delay = BACKOFF_SCHEDULE[Math.min(run.restartCount, BACKOFF_SCHEDULE.length - 1)] run.restartCount++ this.cb.onLog('warn', `restarting in ${delay}ms (attempt ${run.restartCount})`, run.spec.runId) diff --git a/supervisor/tauri/src-tauri/Cargo.toml b/supervisor/tauri/src-tauri/Cargo.toml index 3b185434..b9b15fed 100644 --- a/supervisor/tauri/src-tauri/Cargo.toml +++ b/supervisor/tauri/src-tauri/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "remo-supervisor-tauri" -version = "0.5.1" +version = "0.5.2" description = "Remo Code Supervisor — Windows tray app" authors = ["Fine Designz"] edition = "2021" diff --git a/supervisor/tauri/src-tauri/tauri.conf.json b/supervisor/tauri/src-tauri/tauri.conf.json index 3660af50..96903a57 100644 --- a/supervisor/tauri/src-tauri/tauri.conf.json +++ b/supervisor/tauri/src-tauri/tauri.conf.json @@ -1,7 +1,7 @@ { "$schema": "https://schema.tauri.app/config/2", "productName": "Remo Code Supervisor", - "version": "0.5.1", + "version": "0.5.2", "identifier": "com.finedesignz.remo-code-supervisor", "build": { "beforeDevCommand": "bun run dev", diff --git a/supervisor/tauri/ui/package.json b/supervisor/tauri/ui/package.json index 61748982..7dda85ee 100644 --- a/supervisor/tauri/ui/package.json +++ b/supervisor/tauri/ui/package.json @@ -1,7 +1,7 @@ { "name": "remo-supervisor-ui", "private": true, - "version": "0.5.1", + "version": "0.5.2", "type": "module", "scripts": { "dev": "vite", diff --git a/supervisor/test/no-legacy-agent-spawn.test.ts b/supervisor/test/no-legacy-agent-spawn.test.ts new file mode 100644 index 00000000..e10b2fd4 --- /dev/null +++ b/supervisor/test/no-legacy-agent-spawn.test.ts @@ -0,0 +1,138 @@ +/** + * Phase 09 canary — fail the build if the legacy agent CLI flags or the + * retired npm package name reappear in supervisor source. + * + * Background (RCA 2026-05-27): + * The cached v0.4.1 of `remo-code-agent` (an npm package retired in Phase 09 + * on 2026-05-26) contained the "Always delegate" autonomous-loop bug. The + * supervisor's `process-manager.ts` was still hard-coding + * `npx -y remo-code-agent ...` for `session.start`, so every restart re- + * spawned the broken cached binary. This canary greps the supervisor source + * tree for the forbidden strings and FAILS the test suite if any reappear. + * + * Scope: ONLY `supervisor/src/**` and `supervisor/tauri/src-tauri/**` (the + * Rust + Tauri shell). `supervisor/test/**` is included so the test file + * itself can mention the strings in comments without tripping the canary + * we exclude this file by name. The watchdog's `--dangerously-skip-permissions` + * usage (operator-blessed headless Claude self-heal, NOT a session.start + * path) is an intentional exception and lives in `watchdog.ts`; the canary + * excludes that single file by path. + */ +import { describe, test, expect } from 'bun:test' +import { readdirSync, readFileSync, statSync } from 'fs' +import { join, relative } from 'path' + +const REPO_ROOT = join(import.meta.dir, '..', '..') +const SCAN_DIRS = [ + join(REPO_ROOT, 'supervisor', 'src'), + join(REPO_ROOT, 'supervisor', 'tauri', 'src-tauri'), +] +// Files allowed to contain the otherwise-forbidden strings, with rationale: +// - this test file itself (it names the strings to detect them) +// - watchdog.ts uses --dangerously-skip-permissions for the headless Claude +// self-heal invocation, which is an operator-blessed maintenance path +// and explicitly NOT a session.start spawn. +const EXCLUDE_FILE_SUFFIXES = [ + 'test/no-legacy-agent-spawn.test.ts', + 'src/watchdog.ts', +] + +interface Finding { file: string; needle: string; line: number; preview: string } + +// String needles. We intentionally use literal substrings (not regex) because +// the canary must match exact CLI tokens that may appear in argv arrays. +// +// NOTE: `--initial-prompt` and `--dangerously-skip-permissions` are +// legitimately mentioned in security-gate comments and log strings (the +// supervisor logs when it STRIPS the dangerous flag, etc.). They are NOT +// safe to grep for as bare substrings without false positives. We instead +// scope the canary to the two tokens that have NO legitimate use anywhere +// in supervisor source: +// - `remo-code-agent` (the retired npm package; only valid use is in +// this test file and the disable-spawn comment +// in process-manager.ts, both excluded below) +// - `--append-system-prompt` (legacy CLI flag, never legitimately mentioned) +// +// If a future regression re-introduces an actual `npx -y remo-code-agent` +// spawn, the `remo-code-agent` needle WILL catch it because the offending +// file will not be in EXCLUDE_FILE_SUFFIXES. The two legacy-flag needles +// retired below are still tested as a defense-in-depth check via the +// process-manager `spawn()` unit test, which asserts no subprocess is +// spawned at all. +const FORBIDDEN: string[] = [ + 'remo-code-agent', + '--append-system-prompt', +] + +function walk(dir: string, out: string[] = []): string[] { + let entries: string[] + try { entries = readdirSync(dir) } catch { return out } + for (const name of entries) { + const full = join(dir, name) + let s: ReturnType + try { s = statSync(full) } catch { continue } + if (s.isDirectory()) { + if (name === 'node_modules' || name === 'target' || name === 'dist' || name === '.git') continue + walk(full, out) + } else if (s.isFile()) { + if (name.endsWith('.ts') || name.endsWith('.tsx') || name.endsWith('.rs') || name.endsWith('.js')) { + out.push(full) + } + } + } + return out +} + +function isExcluded(absPath: string): boolean { + const norm = absPath.replace(/\\/g, '/') + return EXCLUDE_FILE_SUFFIXES.some((suf) => norm.endsWith(suf)) +} + +function scanFile(path: string): Finding[] { + let text: string + try { text = readFileSync(path, 'utf-8') } catch { return [] } + const findings: Finding[] = [] + const lines = text.split(/\r?\n/) + for (let i = 0; i < lines.length; i++) { + const ln = lines[i] + for (const needle of FORBIDDEN) { + if (ln.includes(needle)) { + findings.push({ + file: relative(REPO_ROOT, path), + needle, + line: i + 1, + preview: ln.trim().slice(0, 160), + }) + } + } + } + return findings +} + +describe('Phase 09 canary — no legacy agent CLI strings in supervisor source', () => { + test('forbidden tokens are absent from supervisor/src + supervisor/tauri/src-tauri', () => { + const files: string[] = [] + for (const d of SCAN_DIRS) walk(d, files) + const findings: Finding[] = [] + for (const f of files) { + if (isExcluded(f)) continue + findings.push(...scanFile(f)) + } + if (findings.length > 0) { + const msg = [ + '', + 'Phase 09 canary FAILED — forbidden legacy CLI tokens found:', + ...findings.map((f) => ` ${f.file}:${f.line} [${f.needle}] ${f.preview}`), + '', + 'These strings were retired in Phase 09 (2026-05-26). The cached v0.4.1', + 'of remo-code-agent contained the autonomous-loop bug — re-introducing', + 'any of these spawn arguments would re-open that incident.', + '', + 'If you genuinely need the watchdog self-heal exception, add the file', + 'to EXCLUDE_FILE_SUFFIXES in this test with a comment explaining why.', + ].join('\n') + throw new Error(msg) + } + expect(findings.length).toBe(0) + }) +}) diff --git a/supervisor/test/process-manager.test.ts b/supervisor/test/process-manager.test.ts index 3baf400a..da03bd5d 100644 --- a/supervisor/test/process-manager.test.ts +++ b/supervisor/test/process-manager.test.ts @@ -116,11 +116,18 @@ describe('ProcessManager security gates', () => { expect(calls.length).toBe(0) }) - test('allows a legitimate path inside a root', async () => { - const { pm } = makePM(makeCfg()) + test('legacy spawn path is disabled — gates pass but no subprocess is spawned', async () => { + // Phase 09 follow-up: the npx -y remo-code-agent path was retired and the + // in-process claude-runner has not yet landed. start() still passes the + // security gates (returns null) and writes a positive audit row, but the + // run is immediately finalized as stopped + legacy_agent_spawn_disabled + // and no child process is ever spawned. + const { pm, events } = makePM(makeCfg()) const r = await pm.start(spec({ repoPath: REPO_GIT })) expect(r).toBeNull() - expect(calls.length).toBe(1) + expect(calls.length).toBe(0) + const stoppedEvt = events.find((e) => e.state === 'stopped') + expect(stoppedEvt?.info?.lastExit?.reason).toBe('legacy_agent_spawn_disabled') }) test('not_git_repo: rejects when restrictToGit=true and no .git', async () => { @@ -130,48 +137,52 @@ describe('ProcessManager security gates', () => { expect(calls.length).toBe(0) }) - test('git gate is opt-in: with restrictToGit=false a non-git dir is allowed', async () => { + test('git gate is opt-in: with restrictToGit=false a non-git dir passes gates', async () => { const { pm } = makePM(makeCfg({ requireGitRepo: false })) const r = await pm.start(spec({ repoPath: REPO_NOGIT })) expect(r).toBeNull() - expect(calls.length).toBe(1) + expect(calls.length).toBe(0) // legacy spawn disabled }) - test('concurrency_cap: second start rejected with cap=1', async () => { + test('concurrency_cap: enforced before the spawn gate fires', async () => { + // With the legacy spawn disabled, the first start finalizes immediately + // and releases its slot before the second start is attempted — so a + // sequential second start no longer hits the cap. To exercise the + // concurrency_cap gate itself, we monkey-patch the spawn() method to + // hold the run in 'starting' state instead of finalizing. This simulates + // the future inline-runner state where spawn() keeps the slot occupied. const { pm } = makePM(makeCfg({ maxConcurrent: 1 })) + // Replace spawn with a no-op so runs stay in the map after start(). + ;(pm as any).spawn = (run: any) => { run.state = 'starting' /* hold the slot */ } const r1 = await pm.start(spec({ runId: 'a' })) expect(r1).toBeNull() const r2 = await pm.start(spec({ runId: 'b' })) expect(r2?.reason).toBe('concurrency_cap') - expect(calls.length).toBe(1) + expect(calls.length).toBe(0) }) - test('--dangerously-skip-permissions stripped when cap OFF', async () => { - const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: false })) + test('--dangerously-skip-permissions stripped when cap OFF (gate still logs, but no spawn)', async () => { + const { pm, logs } = makePM(makeCfg({ allowDangerousSkipPermissions: false })) await pm.start(spec({ dangerouslySkipPermissions: true })) - expect(calls.length).toBe(1) - expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions') + expect(calls.length).toBe(0) + expect(logs.some((l) => l.msg.includes('flag stripped'))).toBe(true) }) - test('--dangerously-skip-permissions kept when cap ON and hub requests', async () => { + test('--dangerously-skip-permissions is NEVER spawned, even with cap ON (legacy path is disabled)', async () => { const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: true })) await pm.start(spec({ dangerouslySkipPermissions: true })) - expect(calls.length).toBe(1) - expect(calls[0].cmd).toContain('--dangerously-skip-permissions') - }) - - test('--dangerously-skip-permissions absent when cap ON but hub does not request', async () => { - const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: true })) - await pm.start(spec({ dangerouslySkipPermissions: false })) - expect(calls.length).toBe(1) - expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions') + // Legacy spawn refused → nothing in calls[]; the flag never reaches a real argv. + expect(calls.length).toBe(0) }) test('audit log appends one JSONL entry per decision (allow + reject)', async () => { const { pm } = makePM(makeCfg({ maxConcurrent: 1 })) - await pm.start(spec({ runId: 'a', repoPath: REPO_GIT })) - await pm.start(spec({ runId: 'b', repoPath: REPO_GIT })) // hits cap - await pm.start(spec({ runId: 'c', repoPath: TMP })) // sandbox escape + // Hold the slot so the second start hits concurrency_cap (without the + // monkey-patch, spawn() finalizes immediately and releases the slot). + ;(pm as any).spawn = (run: any) => { run.state = 'starting' /* hold the slot */ } + await pm.start(spec({ runId: 'a', repoPath: REPO_GIT })) // allowed + await pm.start(spec({ runId: 'b', repoPath: REPO_GIT })) // concurrency_cap + await pm.start(spec({ runId: 'c', repoPath: TMP })) // sandbox escape const lines = readFileSync(AUDIT_PATH, 'utf-8').trim().split('\n') expect(lines.length).toBe(3) const parsed = lines.map((l) => JSON.parse(l)) @@ -197,12 +208,14 @@ describe('ProcessManager security gates', () => { expect(existsSync(AUDIT_PATH)).toBe(false) }) - test('updateConfig() takes effect on next start', async () => { + test('updateConfig() takes effect on next start (config swap reaches the audit decision path)', async () => { const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: false })) await pm.start(spec({ runId: 'a', dangerouslySkipPermissions: true })) - expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions') pm.updateConfig(makeCfg({ allowDangerousSkipPermissions: true, maxConcurrent: 5 })) await pm.start(spec({ runId: 'b', dangerouslySkipPermissions: true })) - expect(calls[1].cmd).toContain('--dangerously-skip-permissions') + const lines = readFileSync(AUDIT_PATH, 'utf-8').trim().split('\n') + const entries = lines.map((l) => JSON.parse(l)) + expect(entries[0].flags.dangerously_skip_permissions_applied).toBe(false) + expect(entries[1].flags.dangerously_skip_permissions_applied).toBe(true) }) })