From a4e10162c088d8dfa000dc3ab196f6199f2fb6b1 Mon Sep 17 00:00:00 2001
From: Michael <articulatedesigns@gmail.com>
Date: Wed, 27 May 2026 10:17:20 -0700
Subject: [PATCH] fix(supervisor,hub): kill legacy agent spawn + cap orphan
 replay + cap restarts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Structural fixes from the 2026-05-27 autonomous-loop RCA.

Supervisor (process-manager.ts):
- spawn() no longer invokes the retired CLI agent via npx -y. Every
  session.start now finalizes immediately as stopped with
  exit_reason='legacy_agent_spawn_disabled'. The cached buggy v0.4.1 of
  that retired package was the source of the "Always delegate" loop.
- scheduleRestart() now hard-caps at MAX_RESTART_COUNT=10. After 10
  consecutive attempts the run finalizes as max_restarts_exceeded and
  the supervisor stops respawning, instead of looping forever at the
  30s backoff plateau.

Hub auto-resume (ws/agent.ts on supervisor.hello):
- Filters orphan session_runs to rows where started_at > now() - 24h.
- Sweeps older open rows in one UPDATE, finalizing them as
  exit_reason='stale'.
- Skips replay for any orphan whose restart_count >= 10, finalizing as
  exit_reason='max_restarts_exceeded'.

Tests:
- supervisor/test/no-legacy-agent-spawn.test.ts — canary that greps
  supervisor/src/** for the retired package name and --append-system-prompt
  flag. Fails the build if either reappears.
- supervisor/test/process-manager.test.ts — happy-path test rewritten
  to assert the new legacy_agent_spawn_disabled stopped state; concurrency
  + audit tests now monkey-patch spawn() to hold the slot so the cap
  gates remain testable until the inline runner lands.
- hub/test/auto-resume-caps.test.ts — DB-gated unit test covering the
  24h age cap and restart_count >= 10 cap (skipped without REMO_E2E_DB_URL).

Version bump: supervisor 0.5.1 → 0.5.2 across Cargo.toml, tauri.conf.json,
ui/package.json, and the inline VERSION constants in supervisor/src/{index,
hub-client}.ts.

Follow-up not in this PR: the in-process claude-runner that bridges
stream-json over the supervisor WS. Until that lands, session.start
rolls to stopped instead of trapping the supervisor in a respawn loop.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                     |   8 +
 hub/src/ws/agent.ts                           |  40 +++-
 hub/test/auto-resume-caps.test.ts             | 176 ++++++++++++++++++
 supervisor/src/hub-client.ts                  |   2 +-
 supervisor/src/index.ts                       |   2 +-
 supervisor/src/process-manager.ts             | 111 +++++------
 supervisor/tauri/src-tauri/Cargo.toml         |   2 +-
 supervisor/tauri/src-tauri/tauri.conf.json    |   2 +-
 supervisor/tauri/ui/package.json              |   2 +-
 supervisor/test/no-legacy-agent-spawn.test.ts | 138 ++++++++++++++
 supervisor/test/process-manager.test.ts       |  67 ++++---
 11 files changed, 449 insertions(+), 101 deletions(-)
 create mode 100644 hub/test/auto-resume-caps.test.ts
 create mode 100644 supervisor/test/no-legacy-agent-spawn.test.ts

diff --git a/CLAUDE.md b/CLAUDE.md
index b83931fa..b9c4a563 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -69,6 +69,14 @@ cd supervisor/tauri && cargo tauri build
 
 The legacy `npx remo-code-agent` / `claude-remote` shell-alias flow is retired as of 2026-05-26. Install the Tauri Supervisor MSI from https://github.com/finedesignz/remo-code/releases/latest instead.
 
+**Phase 09 follow-up (2026-05-27): legacy spawn path is hard-disabled in supervisor.** The supervisor's `process-manager.ts:spawn()` no longer invokes the retired CLI agent at all — it immediately finalizes every `session.start` as `stopped` with `exit_reason='legacy_agent_spawn_disabled'`. The in-process claude-runner that will replace it (direct `claude --input-format stream-json` spawn bridged over the supervisor WS) is a separate follow-up phase. Until that lands, `session.start` rolls cleanly to stopped instead of trapping the supervisor in a respawn loop against a cached buggy v0.4.1 agent.
+
+**Guards added in the same fix:**
+
+- Supervisor `BACKOFF_SCHEDULE` now caps at `MAX_RESTART_COUNT = 10` — after 10 consecutive restart attempts the run finalizes as `max_restarts_exceeded` and stops respawning.
+- Hub auto-resume on `supervisor.hello` (in `hub/src/ws/agent.ts`) now (a) filters orphan `session_runs` to rows newer than 24h, sweeping older rows as `exit_reason='stale'`, and (b) finalizes any orphan with `restart_count >= 10` as `max_restarts_exceeded` and skips the replay.
+- Canary test `supervisor/test/no-legacy-agent-spawn.test.ts` greps `supervisor/src/**` for the retired `remo-code-agent` package name and `--append-system-prompt` flag; the build FAILS if either reappears.
+
 ## Local Supervisor (only supported connection)
 
 The supervisor (`supervisor/src/index.ts`, compiled into the Tauri sidecar binary) runs on the dev machine as a tray app. It:
diff --git a/hub/src/ws/agent.ts b/hub/src/ws/agent.ts
index 8117878d..5267d35c 100644
--- a/hub/src/ws/agent.ts
+++ b/hub/src/ws/agent.ts
@@ -512,17 +512,53 @@ async function handleSupervisorMessage(ws: ServerWebSocket<AgentWsData>, msg: an
     // These were orphaned by a reboot/restart. We end the old run row and send a
     // fresh session.start to the now-online supervisor. The new run reuses the
     // same project_dir, so the UI session row is reused and history persists.
+    //
+    // Guards added 2026-05-27 after the autonomous-loop RCA:
+    //   1. AGE CAP — only resume runs that started in the last 24h. Older
+    //      open rows are stale carryovers from a long-gone session; replaying
+    //      them produces zombie sessions the user has forgotten about.
+    //   2. RESTART CAP — when restart_count >= 10, finalize the run as
+    //      `max_restarts_exceeded` and skip the replay. Prevents the hub from
+    //      feeding the same broken spawn back into the supervisor over and
+    //      over after a reconnect.
     try {
       const { sql } = await import('../db/postgres')
+      const MAX_RESTART_COUNT = 10
       const orphans = await sql`
-        SELECT id, repo_path, branch, initial_prompt
+        SELECT id, repo_path, branch, initial_prompt, restart_count, started_at
         FROM session_runs
-        WHERE supervisor_id = ${row.id} AND ended_at IS NULL
+        WHERE supervisor_id = ${row.id}
+          AND ended_at IS NULL
+          AND started_at > now() - interval '24 hours'
         ORDER BY started_at ASC
       `
+      // Sweep any open rows that fell outside the 24h window — finalize them
+      // as `stale` so they don't reappear on the next reconnect.
+      const staleSweep = await sql`
+        UPDATE session_runs
+        SET ended_at = now(), exit_reason = 'stale'
+        WHERE supervisor_id = ${row.id}
+          AND ended_at IS NULL
+          AND started_at <= now() - interval '24 hours'
+        RETURNING id
+      `
+      if (staleSweep.length > 0) {
+        console.log(`[supervisor] auto-resume finalized ${staleSweep.length} stale run(s) older than 24h`)
+      }
       if (orphans.length > 0) {
         console.log(`[supervisor] auto-resuming ${orphans.length} orphan session(s)`)
         for (const o of orphans) {
+          // Restart-count cap — if this run has already been restarted too
+          // many times, finalize it instead of replaying. Stops runaway loops.
+          if (typeof o.restart_count === 'number' && o.restart_count >= MAX_RESTART_COUNT) {
+            await sql`
+              UPDATE session_runs
+              SET ended_at = now(), exit_reason = 'max_restarts_exceeded'
+              WHERE id = ${o.id}
+            `
+            console.warn(`[supervisor] auto-resume skipped run=${o.id} reason=max_restarts_exceeded restart_count=${o.restart_count}`)
+            continue
+          }
           // End the orphan FIRST so it doesn't count against the cap when we
           // reserve a slot for its replacement.
           await sql`UPDATE session_runs SET ended_at = now(), exit_reason = 'reboot' WHERE id = ${o.id}`
diff --git a/hub/test/auto-resume-caps.test.ts b/hub/test/auto-resume-caps.test.ts
new file mode 100644
index 00000000..62706d5e
--- /dev/null
+++ b/hub/test/auto-resume-caps.test.ts
@@ -0,0 +1,176 @@
+/**
+ * Auto-resume guards (Phase 09 follow-up, 2026-05-27 RCA).
+ *
+ * Verifies the two new caps on hub/src/ws/agent.ts:supervisor.hello auto-resume:
+ *
+ *   1. AGE CAP — only resume `session_runs` rows where `started_at > now() - 24h`.
+ *      Older open rows are swept and finalized as `exit_reason='stale'`.
+ *   2. RESTART CAP — when `restart_count >= 10`, finalize the run as
+ *      `exit_reason='max_restarts_exceeded'` and skip the replay.
+ *
+ * Gated on REMO_E2E_DB_URL so the rest of `bun test` stays green without a DB.
+ */
+
+process.env.JWT_SECRET = process.env.JWT_SECRET || 'test-secret-at-least-32-chars-long-aaaaaaaa'
+process.env.SESSION_SECRET = process.env.SESSION_SECRET || 'session-secret-at-least-32-chars-long-x'
+process.env.MAGIC_LINK_SECRET = process.env.MAGIC_LINK_SECRET || 'magic-link-secret-at-least-32-chars-x'
+process.env.TITANIUM_KEYGEN_API_URL = process.env.TITANIUM_KEYGEN_API_URL || 'https://keygen.titaniumlabs.us'
+process.env.TITANIUM_KEYGEN_ACCOUNT_ID = process.env.TITANIUM_KEYGEN_ACCOUNT_ID || 'acct_test_0000000000'
+process.env.TITANIUM_KEYGEN_PRODUCT_ID = process.env.TITANIUM_KEYGEN_PRODUCT_ID || 'prod_test_remo'
+if (process.env.REMO_E2E_DB_URL) process.env.DATABASE_URL = process.env.REMO_E2E_DB_URL
+
+import { describe, test, expect, beforeAll, afterAll, beforeEach } from 'bun:test'
+
+const HAS_TEST_DB = !!process.env.REMO_E2E_DB_URL
+const maybe = HAS_TEST_DB ? describe : describe.skip
+
+const TEST_USER_ID = '00000000-0000-0000-0000-0000000ar001'
+const TEST_API_KEY_ID = 'apikey_ar001'
+const TEST_SUPERVISOR_ID = 'sup_ar001'
+
+let sql: any
+
+async function seed() {
+  await sql.unsafe(`
+    INSERT INTO users (id, email, password_hash, role)
+    VALUES ('${TEST_USER_ID}', 'ar001+autoresume@test.local', 'x', 'user')
+    ON CONFLICT (id) DO NOTHING;
+  `)
+  await sql.unsafe(`
+    INSERT INTO api_keys (id, user_id, key_hash, capabilities, name)
+    VALUES ('${TEST_API_KEY_ID}', '${TEST_USER_ID}', 'ar001-hash', '["supervisor"]'::jsonb, 'ar001')
+    ON CONFLICT (id) DO NOTHING;
+  `)
+  await sql.unsafe(`
+    INSERT INTO supervisors (id, user_id, api_key_id, hostname, roots, concurrency_budget, last_seen_at)
+    VALUES ('${TEST_SUPERVISOR_ID}', '${TEST_USER_ID}', '${TEST_API_KEY_ID}', 'ar001-host', '{}'::text[], 4, now())
+    ON CONFLICT (id) DO NOTHING;
+  `)
+}
+
+async function cleanupRuns() {
+  await sql`DELETE FROM session_runs WHERE supervisor_id = ${TEST_SUPERVISOR_ID}`
+}
+
+maybe('auto-resume age + restart-count caps', () => {
+  beforeAll(async () => {
+    ;({ sql } = await import('../src/db/postgres'))
+    await seed()
+  })
+
+  afterAll(async () => {
+    await cleanupRuns()
+    await sql`DELETE FROM supervisors WHERE id = ${TEST_SUPERVISOR_ID}`
+    await sql`DELETE FROM api_keys WHERE id = ${TEST_API_KEY_ID}`
+    await sql`DELETE FROM users WHERE id = ${TEST_USER_ID}`
+  })
+
+  beforeEach(async () => {
+    await cleanupRuns()
+  })
+
+  test('fresh run (<24h old, restart_count=0) IS picked up by the orphan query', async () => {
+    await sql`
+      INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count)
+      VALUES ('run_fresh_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/fresh', now() - interval '2 hours', NULL, 0)
+    `
+    const rows = await sql`
+      SELECT id, restart_count, started_at
+      FROM session_runs
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at > now() - interval '24 hours'
+      ORDER BY started_at ASC
+    `
+    expect(rows.length).toBe(1)
+    expect(rows[0].id).toBe('run_fresh_1')
+    expect(rows[0].restart_count).toBe(0)
+  })
+
+  test('stale run (>24h old) is EXCLUDED from the orphan query AND finalized by the sweep', async () => {
+    await sql`
+      INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at)
+      VALUES ('run_stale_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/stale', now() - interval '48 hours', NULL)
+    `
+    // Mirror agent.ts: SELECT excludes >24h
+    const orphans = await sql`
+      SELECT id FROM session_runs
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at > now() - interval '24 hours'
+    `
+    expect(orphans.length).toBe(0)
+
+    // Mirror agent.ts: UPDATE finalizes anything older than 24h as stale.
+    const swept = await sql`
+      UPDATE session_runs
+      SET ended_at = now(), exit_reason = 'stale'
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at <= now() - interval '24 hours'
+      RETURNING id, exit_reason
+    `
+    expect(swept.length).toBe(1)
+    expect(swept[0].id).toBe('run_stale_1')
+    expect(swept[0].exit_reason).toBe('stale')
+  })
+
+  test('run with restart_count >= 10 is identified by the cap check (skipped + finalized)', async () => {
+    await sql`
+      INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count)
+      VALUES ('run_loopy_1', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/loopy', now() - interval '1 hour', NULL, 10)
+    `
+    const orphans = await sql`
+      SELECT id, restart_count FROM session_runs
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at > now() - interval '24 hours'
+    `
+    expect(orphans.length).toBe(1)
+    expect(orphans[0].restart_count).toBe(10)
+
+    // The replay code finalizes these and continues. Simulate that step.
+    await sql`
+      UPDATE session_runs
+      SET ended_at = now(), exit_reason = 'max_restarts_exceeded'
+      WHERE id = 'run_loopy_1'
+    `
+    const after = await sql`
+      SELECT ended_at, exit_reason FROM session_runs WHERE id = 'run_loopy_1'
+    `
+    expect(after[0].exit_reason).toBe('max_restarts_exceeded')
+    expect(after[0].ended_at).not.toBeNull()
+  })
+
+  test('combination: stale-and-loopy is finalized via the stale sweep (age guard wins)', async () => {
+    // A run that's both >24h old AND has restart_count=10. The age sweep
+    // fires first (in agent.ts), so the row is finalized as 'stale' and the
+    // restart-count gate never sees it. This matches the in-code order.
+    await sql`
+      INSERT INTO session_runs (id, user_id, supervisor_id, repo_path, started_at, ended_at, restart_count)
+      VALUES ('run_stale_loopy', ${TEST_USER_ID}, ${TEST_SUPERVISOR_ID}, 'C:/x/sl', now() - interval '30 hours', NULL, 10)
+    `
+    const orphans = await sql`
+      SELECT id FROM session_runs
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at > now() - interval '24 hours'
+    `
+    expect(orphans.length).toBe(0)
+    const swept = await sql`
+      UPDATE session_runs
+      SET ended_at = now(), exit_reason = 'stale'
+      WHERE supervisor_id = ${TEST_SUPERVISOR_ID}
+        AND ended_at IS NULL
+        AND started_at <= now() - interval '24 hours'
+      RETURNING id, exit_reason
+    `
+    expect(swept.length).toBe(1)
+    expect(swept[0].exit_reason).toBe('stale')
+  })
+})
+
+if (!HAS_TEST_DB) {
+  // Sanity skip so the suite doesn't appear empty in CI.
+  test.skip('auto-resume caps test suite skipped — set REMO_E2E_DB_URL to run', () => {})
+}
diff --git a/supervisor/src/hub-client.ts b/supervisor/src/hub-client.ts
index cbb4214c..f04549a0 100644
--- a/supervisor/src/hub-client.ts
+++ b/supervisor/src/hub-client.ts
@@ -9,7 +9,7 @@ import { getHandler, nativeSupervisorCommands } from './commands/index'
 import { CONFIG_PATH, saveConfig, type SupervisorConfig } from './config'
 
 // Keep in sync with supervisor/tauri/src-tauri/tauri.conf.json version
-const VERSION = '0.5.1'
+const VERSION = '0.5.2'
 
 type OutboundMsg =
   | { type: 'auth'; api_key: string; project_dir: string; hostname: string; role: 'supervisor' }
diff --git a/supervisor/src/index.ts b/supervisor/src/index.ts
index c30a0da4..e862ad35 100644
--- a/supervisor/src/index.ts
+++ b/supervisor/src/index.ts
@@ -7,7 +7,7 @@ import { join } from 'path'
 import { homedir } from 'os'
 
 // Keep in sync with supervisor/tauri/src-tauri/tauri.conf.json version
-const VERSION = '0.5.1'
+const VERSION = '0.5.2'
 
 function logDir(): string {
   if (process.platform === 'win32') {
diff --git a/supervisor/src/process-manager.ts b/supervisor/src/process-manager.ts
index 07a407e2..5baae00e 100644
--- a/supervisor/src/process-manager.ts
+++ b/supervisor/src/process-manager.ts
@@ -19,7 +19,7 @@ export interface RunSpec {
 }
 
 export interface StartRejection {
-  reason: 'sandbox_escape' | 'not_git_repo' | 'concurrency_cap' | 'duplicate_run'
+  reason: 'sandbox_escape' | 'not_git_repo' | 'concurrency_cap' | 'duplicate_run' | 'legacy_agent_spawn_disabled'
   detail?: Record<string, unknown>
 }
 
@@ -31,6 +31,10 @@ export interface ProcessManagerCallbacks {
 const BACKOFF_SCHEDULE = [1000, 2000, 4000, 8000, 16000, 30000]
 const CIRCUIT_WINDOW_MS = 10 * 60_000
 const CIRCUIT_THRESHOLD = 5
+/** Phase 09 follow-up — hard cap on restart attempts. After this, the run is
+ *  finalized as `max_restarts_exceeded` and the supervisor stops respawning it.
+ *  Prevents runaway loops when the same broken spawn keeps crashing. */
+const MAX_RESTART_COUNT = 10
 
 interface RunInstance {
   spec: RunSpec
@@ -193,76 +197,49 @@ export class ProcessManager {
     this.setState(run, 'starting', { runId: spec.runId, repoPath: spec.repoPath })
 
     run.stderrTail = []
-    // Windows resolves npx → npx.cmd via PATHEXT; Bun.spawn doesn't, so name it explicitly.
-    const npx = process.platform === 'win32' ? 'npx.cmd' : 'npx'
-    const cmd = [
-      npx, '-y', 'remo-code-agent',
-      '--api-key', spec.apiKey,
-      '--hub-url', spec.hubUrl,
-      '--project-dir', spec.repoPath,
-    ]
-    if (spec.initialPrompt) {
-      cmd.push('--initial-prompt', spec.initialPrompt)
-    }
-    if (spec.dangerouslySkipPermissions && this.cfg.allowDangerousSkipPermissions) {
-      cmd.push('--dangerously-skip-permissions')
-    }
-    const env = { ...process.env }
-    delete (env as any).ANTHROPIC_API_KEY
-
-    const spawnOpts = {
-      cwd: spec.repoPath,
-      stdin: 'pipe' as const,
-      stdout: 'pipe' as const,
-      stderr: 'pipe' as const,
-      env,
-      windowsHide: true,
-    }
-    try {
-      run.proc = this.spawnImpl ? this.spawnImpl(cmd, spawnOpts) : Bun.spawn(cmd, spawnOpts)
-    } catch (err: any) {
-      this.cb.onLog('error', `failed to spawn agent: ${err.message}`, spec.runId)
-      this.setState(run, 'crashed', { runId: spec.runId, repoPath: spec.repoPath, lastExit: { code: null, reason: `spawn_error: ${err.message}` } })
-      this.scheduleRestart(run, null, 'spawn_error')
-      return
-    }
-
-    const pid = run.proc.pid
-    this.cb.onLog('info', `agent spawned pid=${pid} in ${spec.repoPath}`, spec.runId)
-    this.setState(run, 'running', { runId: spec.runId, repoPath: spec.repoPath, pid })
-
-    this.consumeStdout(run)
-    this.consumeStderr(run)
-
-    run.proc.exited.then((code) => {
-      const reason = run.userStop ? 'user' : code === 0 ? 'clean' : 'crash'
-      const tail = run.stderrTail.slice(-40).join('\n')
-      this.cb.onLog(reason === 'crash' ? 'error' : 'info', `agent exited code=${code} reason=${reason}`, spec.runId)
-
-      if (run.userStop) {
-        this.setState(run, 'idle', { runId: spec.runId, lastExit: { code: code ?? null, reason, stderrTail: tail } })
-        this.runs.delete(spec.runId)
-        return
-      }
-      if (reason === 'clean') {
-        this.setState(run, 'idle', { runId: spec.runId, lastExit: { code: code ?? null, reason, stderrTail: tail } })
-        this.runs.delete(spec.runId)
-        return
-      }
-      // crash
-      run.recentCrashes.push(Date.now())
-      run.recentCrashes = run.recentCrashes.filter((t) => Date.now() - t < CIRCUIT_WINDOW_MS)
-      if (run.recentCrashes.length >= CIRCUIT_THRESHOLD) {
-        this.cb.onLog('error', `circuit breaker open — stopping`, spec.runId)
-        this.setState(run, 'stopped', { runId: spec.runId, lastExit: { code: code ?? null, reason: 'circuit_open', stderrTail: tail } })
-        this.runs.delete(spec.runId)
-        return
-      }
-      this.scheduleRestart(run, code ?? null, 'crash', tail)
+    // Phase 09 retired the user-facing legacy CLI agent on 2026-05-26. The
+    // cached v0.4.1 of that retired package contained the "Always delegate"
+    // autonomous-loop bug. Until the in-process claude-runner (stream-json
+    // over stdin/stdout, bridged to the hub WS) is built as its own follow-up
+    // phase, this spawn path is DISABLED. The supervisor refuses the run and
+    // emits a structured stopped state so the hub finalizes the run row
+    // cleanly instead of trapping it in an indefinite respawn loop.
+    //
+    // Test canary: supervisor/test/no-legacy-agent-spawn.test.ts greps the
+    // tree for the retired package name and forbidden flags; if any reappear
+    // in supervisor source, that test FAILS the build.
+    this.cb.onLog(
+      'error',
+      `[security] legacy_agent_spawn_disabled: the retired CLI spawn path is disabled in Phase 09; session.start cannot proceed until the in-process claude-runner lands. Run will be finalized as stopped.`,
+      spec.runId,
+    )
+    this.setState(run, 'stopped', {
+      runId: spec.runId,
+      repoPath: spec.repoPath,
+      lastExit: { code: null, reason: 'legacy_agent_spawn_disabled' },
     })
+    this.runs.delete(spec.runId)
+    return
   }
 
   private scheduleRestart(run: RunInstance, exitCode: number | null, reason: string, stderrTail = '') {
+    // Hard cap on restart attempts. After MAX_RESTART_COUNT consecutive
+    // restarts the supervisor stops respawning the run and finalizes it
+    // as `max_restarts_exceeded`. Prevents runaway loops (e.g. the cached
+    // buggy v0.4.1 agent that triggered this fix in the first place).
+    if (run.restartCount >= MAX_RESTART_COUNT) {
+      this.cb.onLog(
+        'error',
+        `max_restarts_exceeded — giving up after ${run.restartCount} restart attempts`,
+        run.spec.runId,
+      )
+      this.setState(run, 'stopped', {
+        runId: run.spec.runId,
+        lastExit: { code: exitCode, reason: 'max_restarts_exceeded', stderrTail },
+      })
+      this.runs.delete(run.spec.runId)
+      return
+    }
     const delay = BACKOFF_SCHEDULE[Math.min(run.restartCount, BACKOFF_SCHEDULE.length - 1)]
     run.restartCount++
     this.cb.onLog('warn', `restarting in ${delay}ms (attempt ${run.restartCount})`, run.spec.runId)
diff --git a/supervisor/tauri/src-tauri/Cargo.toml b/supervisor/tauri/src-tauri/Cargo.toml
index 3b185434..b9b15fed 100644
--- a/supervisor/tauri/src-tauri/Cargo.toml
+++ b/supervisor/tauri/src-tauri/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "remo-supervisor-tauri"
-version = "0.5.1"
+version = "0.5.2"
 description = "Remo Code Supervisor — Windows tray app"
 authors = ["Fine Designz"]
 edition = "2021"
diff --git a/supervisor/tauri/src-tauri/tauri.conf.json b/supervisor/tauri/src-tauri/tauri.conf.json
index 3660af50..96903a57 100644
--- a/supervisor/tauri/src-tauri/tauri.conf.json
+++ b/supervisor/tauri/src-tauri/tauri.conf.json
@@ -1,7 +1,7 @@
 {
   "$schema": "https://schema.tauri.app/config/2",
   "productName": "Remo Code Supervisor",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "identifier": "com.finedesignz.remo-code-supervisor",
   "build": {
     "beforeDevCommand": "bun run dev",
diff --git a/supervisor/tauri/ui/package.json b/supervisor/tauri/ui/package.json
index 61748982..7dda85ee 100644
--- a/supervisor/tauri/ui/package.json
+++ b/supervisor/tauri/ui/package.json
@@ -1,7 +1,7 @@
 {
   "name": "remo-supervisor-ui",
   "private": true,
-  "version": "0.5.1",
+  "version": "0.5.2",
   "type": "module",
   "scripts": {
     "dev": "vite",
diff --git a/supervisor/test/no-legacy-agent-spawn.test.ts b/supervisor/test/no-legacy-agent-spawn.test.ts
new file mode 100644
index 00000000..e10b2fd4
--- /dev/null
+++ b/supervisor/test/no-legacy-agent-spawn.test.ts
@@ -0,0 +1,138 @@
+/**
+ * Phase 09 canary — fail the build if the legacy agent CLI flags or the
+ * retired npm package name reappear in supervisor source.
+ *
+ * Background (RCA 2026-05-27):
+ *   The cached v0.4.1 of `remo-code-agent` (an npm package retired in Phase 09
+ *   on 2026-05-26) contained the "Always delegate" autonomous-loop bug. The
+ *   supervisor's `process-manager.ts` was still hard-coding
+ *   `npx -y remo-code-agent ...` for `session.start`, so every restart re-
+ *   spawned the broken cached binary. This canary greps the supervisor source
+ *   tree for the forbidden strings and FAILS the test suite if any reappear.
+ *
+ * Scope: ONLY `supervisor/src/**` and `supervisor/tauri/src-tauri/**` (the
+ *   Rust + Tauri shell). `supervisor/test/**` is included so the test file
+ *   itself can mention the strings in comments without tripping the canary
+ *   we exclude this file by name. The watchdog's `--dangerously-skip-permissions`
+ *   usage (operator-blessed headless Claude self-heal, NOT a session.start
+ *   path) is an intentional exception and lives in `watchdog.ts`; the canary
+ *   excludes that single file by path.
+ */
+import { describe, test, expect } from 'bun:test'
+import { readdirSync, readFileSync, statSync } from 'fs'
+import { join, relative } from 'path'
+
+const REPO_ROOT = join(import.meta.dir, '..', '..')
+const SCAN_DIRS = [
+  join(REPO_ROOT, 'supervisor', 'src'),
+  join(REPO_ROOT, 'supervisor', 'tauri', 'src-tauri'),
+]
+// Files allowed to contain the otherwise-forbidden strings, with rationale:
+//   - this test file itself (it names the strings to detect them)
+//   - watchdog.ts uses --dangerously-skip-permissions for the headless Claude
+//     self-heal invocation, which is an operator-blessed maintenance path
+//     and explicitly NOT a session.start spawn.
+const EXCLUDE_FILE_SUFFIXES = [
+  'test/no-legacy-agent-spawn.test.ts',
+  'src/watchdog.ts',
+]
+
+interface Finding { file: string; needle: string; line: number; preview: string }
+
+// String needles. We intentionally use literal substrings (not regex) because
+// the canary must match exact CLI tokens that may appear in argv arrays.
+//
+// NOTE: `--initial-prompt` and `--dangerously-skip-permissions` are
+// legitimately mentioned in security-gate comments and log strings (the
+// supervisor logs when it STRIPS the dangerous flag, etc.). They are NOT
+// safe to grep for as bare substrings without false positives. We instead
+// scope the canary to the two tokens that have NO legitimate use anywhere
+// in supervisor source:
+//   - `remo-code-agent`     (the retired npm package; only valid use is in
+//                            this test file and the disable-spawn comment
+//                            in process-manager.ts, both excluded below)
+//   - `--append-system-prompt` (legacy CLI flag, never legitimately mentioned)
+//
+// If a future regression re-introduces an actual `npx -y remo-code-agent`
+// spawn, the `remo-code-agent` needle WILL catch it because the offending
+// file will not be in EXCLUDE_FILE_SUFFIXES. The two legacy-flag needles
+// retired below are still tested as a defense-in-depth check via the
+// process-manager `spawn()` unit test, which asserts no subprocess is
+// spawned at all.
+const FORBIDDEN: string[] = [
+  'remo-code-agent',
+  '--append-system-prompt',
+]
+
+function walk(dir: string, out: string[] = []): string[] {
+  let entries: string[]
+  try { entries = readdirSync(dir) } catch { return out }
+  for (const name of entries) {
+    const full = join(dir, name)
+    let s: ReturnType<typeof statSync>
+    try { s = statSync(full) } catch { continue }
+    if (s.isDirectory()) {
+      if (name === 'node_modules' || name === 'target' || name === 'dist' || name === '.git') continue
+      walk(full, out)
+    } else if (s.isFile()) {
+      if (name.endsWith('.ts') || name.endsWith('.tsx') || name.endsWith('.rs') || name.endsWith('.js')) {
+        out.push(full)
+      }
+    }
+  }
+  return out
+}
+
+function isExcluded(absPath: string): boolean {
+  const norm = absPath.replace(/\\/g, '/')
+  return EXCLUDE_FILE_SUFFIXES.some((suf) => norm.endsWith(suf))
+}
+
+function scanFile(path: string): Finding[] {
+  let text: string
+  try { text = readFileSync(path, 'utf-8') } catch { return [] }
+  const findings: Finding[] = []
+  const lines = text.split(/\r?\n/)
+  for (let i = 0; i < lines.length; i++) {
+    const ln = lines[i]
+    for (const needle of FORBIDDEN) {
+      if (ln.includes(needle)) {
+        findings.push({
+          file: relative(REPO_ROOT, path),
+          needle,
+          line: i + 1,
+          preview: ln.trim().slice(0, 160),
+        })
+      }
+    }
+  }
+  return findings
+}
+
+describe('Phase 09 canary — no legacy agent CLI strings in supervisor source', () => {
+  test('forbidden tokens are absent from supervisor/src + supervisor/tauri/src-tauri', () => {
+    const files: string[] = []
+    for (const d of SCAN_DIRS) walk(d, files)
+    const findings: Finding[] = []
+    for (const f of files) {
+      if (isExcluded(f)) continue
+      findings.push(...scanFile(f))
+    }
+    if (findings.length > 0) {
+      const msg = [
+        '',
+        'Phase 09 canary FAILED — forbidden legacy CLI tokens found:',
+        ...findings.map((f) => `  ${f.file}:${f.line}  [${f.needle}]  ${f.preview}`),
+        '',
+        'These strings were retired in Phase 09 (2026-05-26). The cached v0.4.1',
+        'of remo-code-agent contained the autonomous-loop bug — re-introducing',
+        'any of these spawn arguments would re-open that incident.',
+        '',
+        'If you genuinely need the watchdog self-heal exception, add the file',
+        'to EXCLUDE_FILE_SUFFIXES in this test with a comment explaining why.',
+      ].join('\n')
+      throw new Error(msg)
+    }
+    expect(findings.length).toBe(0)
+  })
+})
diff --git a/supervisor/test/process-manager.test.ts b/supervisor/test/process-manager.test.ts
index 3baf400a..da03bd5d 100644
--- a/supervisor/test/process-manager.test.ts
+++ b/supervisor/test/process-manager.test.ts
@@ -116,11 +116,18 @@ describe('ProcessManager security gates', () => {
     expect(calls.length).toBe(0)
   })
 
-  test('allows a legitimate path inside a root', async () => {
-    const { pm } = makePM(makeCfg())
+  test('legacy spawn path is disabled — gates pass but no subprocess is spawned', async () => {
+    // Phase 09 follow-up: the npx -y remo-code-agent path was retired and the
+    // in-process claude-runner has not yet landed. start() still passes the
+    // security gates (returns null) and writes a positive audit row, but the
+    // run is immediately finalized as stopped + legacy_agent_spawn_disabled
+    // and no child process is ever spawned.
+    const { pm, events } = makePM(makeCfg())
     const r = await pm.start(spec({ repoPath: REPO_GIT }))
     expect(r).toBeNull()
-    expect(calls.length).toBe(1)
+    expect(calls.length).toBe(0)
+    const stoppedEvt = events.find((e) => e.state === 'stopped')
+    expect(stoppedEvt?.info?.lastExit?.reason).toBe('legacy_agent_spawn_disabled')
   })
 
   test('not_git_repo: rejects when restrictToGit=true and no .git', async () => {
@@ -130,48 +137,52 @@ describe('ProcessManager security gates', () => {
     expect(calls.length).toBe(0)
   })
 
-  test('git gate is opt-in: with restrictToGit=false a non-git dir is allowed', async () => {
+  test('git gate is opt-in: with restrictToGit=false a non-git dir passes gates', async () => {
     const { pm } = makePM(makeCfg({ requireGitRepo: false }))
     const r = await pm.start(spec({ repoPath: REPO_NOGIT }))
     expect(r).toBeNull()
-    expect(calls.length).toBe(1)
+    expect(calls.length).toBe(0) // legacy spawn disabled
   })
 
-  test('concurrency_cap: second start rejected with cap=1', async () => {
+  test('concurrency_cap: enforced before the spawn gate fires', async () => {
+    // With the legacy spawn disabled, the first start finalizes immediately
+    // and releases its slot before the second start is attempted — so a
+    // sequential second start no longer hits the cap. To exercise the
+    // concurrency_cap gate itself, we monkey-patch the spawn() method to
+    // hold the run in 'starting' state instead of finalizing. This simulates
+    // the future inline-runner state where spawn() keeps the slot occupied.
     const { pm } = makePM(makeCfg({ maxConcurrent: 1 }))
+    // Replace spawn with a no-op so runs stay in the map after start().
+    ;(pm as any).spawn = (run: any) => { run.state = 'starting' /* hold the slot */ }
     const r1 = await pm.start(spec({ runId: 'a' }))
     expect(r1).toBeNull()
     const r2 = await pm.start(spec({ runId: 'b' }))
     expect(r2?.reason).toBe('concurrency_cap')
-    expect(calls.length).toBe(1)
+    expect(calls.length).toBe(0)
   })
 
-  test('--dangerously-skip-permissions stripped when cap OFF', async () => {
-    const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: false }))
+  test('--dangerously-skip-permissions stripped when cap OFF (gate still logs, but no spawn)', async () => {
+    const { pm, logs } = makePM(makeCfg({ allowDangerousSkipPermissions: false }))
     await pm.start(spec({ dangerouslySkipPermissions: true }))
-    expect(calls.length).toBe(1)
-    expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions')
+    expect(calls.length).toBe(0)
+    expect(logs.some((l) => l.msg.includes('flag stripped'))).toBe(true)
   })
 
-  test('--dangerously-skip-permissions kept when cap ON and hub requests', async () => {
+  test('--dangerously-skip-permissions is NEVER spawned, even with cap ON (legacy path is disabled)', async () => {
     const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: true }))
     await pm.start(spec({ dangerouslySkipPermissions: true }))
-    expect(calls.length).toBe(1)
-    expect(calls[0].cmd).toContain('--dangerously-skip-permissions')
-  })
-
-  test('--dangerously-skip-permissions absent when cap ON but hub does not request', async () => {
-    const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: true }))
-    await pm.start(spec({ dangerouslySkipPermissions: false }))
-    expect(calls.length).toBe(1)
-    expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions')
+    // Legacy spawn refused → nothing in calls[]; the flag never reaches a real argv.
+    expect(calls.length).toBe(0)
   })
 
   test('audit log appends one JSONL entry per decision (allow + reject)', async () => {
     const { pm } = makePM(makeCfg({ maxConcurrent: 1 }))
-    await pm.start(spec({ runId: 'a', repoPath: REPO_GIT }))
-    await pm.start(spec({ runId: 'b', repoPath: REPO_GIT })) // hits cap
-    await pm.start(spec({ runId: 'c', repoPath: TMP })) // sandbox escape
+    // Hold the slot so the second start hits concurrency_cap (without the
+    // monkey-patch, spawn() finalizes immediately and releases the slot).
+    ;(pm as any).spawn = (run: any) => { run.state = 'starting' /* hold the slot */ }
+    await pm.start(spec({ runId: 'a', repoPath: REPO_GIT }))      // allowed
+    await pm.start(spec({ runId: 'b', repoPath: REPO_GIT }))      // concurrency_cap
+    await pm.start(spec({ runId: 'c', repoPath: TMP }))           // sandbox escape
     const lines = readFileSync(AUDIT_PATH, 'utf-8').trim().split('\n')
     expect(lines.length).toBe(3)
     const parsed = lines.map((l) => JSON.parse(l))
@@ -197,12 +208,14 @@ describe('ProcessManager security gates', () => {
     expect(existsSync(AUDIT_PATH)).toBe(false)
   })
 
-  test('updateConfig() takes effect on next start', async () => {
+  test('updateConfig() takes effect on next start (config swap reaches the audit decision path)', async () => {
     const { pm } = makePM(makeCfg({ allowDangerousSkipPermissions: false }))
     await pm.start(spec({ runId: 'a', dangerouslySkipPermissions: true }))
-    expect(calls[0].cmd).not.toContain('--dangerously-skip-permissions')
     pm.updateConfig(makeCfg({ allowDangerousSkipPermissions: true, maxConcurrent: 5 }))
     await pm.start(spec({ runId: 'b', dangerouslySkipPermissions: true }))
-    expect(calls[1].cmd).toContain('--dangerously-skip-permissions')
+    const lines = readFileSync(AUDIT_PATH, 'utf-8').trim().split('\n')
+    const entries = lines.map((l) => JSON.parse(l))
+    expect(entries[0].flags.dangerously_skip_permissions_applied).toBe(false)
+    expect(entries[1].flags.dangerously_skip_permissions_applied).toBe(true)
   })
 })