From 3259b002c3ad311b71ff6078d95734bc9968f1b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 03:24:56 +0000 Subject: [PATCH 1/7] task: move stopped-workspace-auto-delete to active Co-Authored-By: Claude Opus 4.6 --- .../2026-05-06-stopped-workspace-auto-delete.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tasks/{backlog => active}/2026-05-06-stopped-workspace-auto-delete.md (100%) diff --git a/tasks/backlog/2026-05-06-stopped-workspace-auto-delete.md b/tasks/active/2026-05-06-stopped-workspace-auto-delete.md similarity index 100% rename from tasks/backlog/2026-05-06-stopped-workspace-auto-delete.md rename to tasks/active/2026-05-06-stopped-workspace-auto-delete.md From 537982c031042dc0beecf6bf0d804e15bdbd7bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 03:41:32 +0000 Subject: [PATCH 2/7] feat: auto-delete stopped workspaces after configurable TTL Extend NodeLifecycle DO to schedule workspace deletion 5 minutes after a workspace stops. This prevents disk exhaustion on nodes caused by accumulated stopped workspaces (Docker volumes with git repos, node_modules). - Add DEFAULT_WORKSPACE_STOPPED_TTL_MS (300000ms) to shared constants - Add scheduleWorkspaceDeletion/cancelWorkspaceDeletion to NodeLifecycle DO - Alarm handler processes expired deletions (VM agent DELETE + D1 update) - recalculateAlarm picks earliest of warm timeout and pending deletions - Schedule deletion from: stop route, cleanupTaskRun, cleanupOnFailure - Cancel deletion from: restart route (before TTL expires) - Cron safety-net in node-cleanup.ts for missed DO alarms - Configurable via WORKSPACE_STOPPED_TTL_MS env var Co-Authored-By: Claude Opus 4.6 --- apps/api/.env.example | 1 + .../api/src/durable-objects/node-lifecycle.ts | 232 +++++++++++++++- .../task-runner/state-machine.ts | 14 + apps/api/src/env.ts | 2 + apps/api/src/routes/workspaces/lifecycle.ts | 20 ++ apps/api/src/scheduled/node-cleanup.ts | 44 +++- apps/api/src/services/task-runner.ts | 16 ++ .../node-lifecycle-workspace-deletion.test.ts | 249 ++++++++++++++++++ apps/api/tests/unit/node-lifecycle.test.ts | 14 +- packages/shared/src/constants/index.ts | 1 + packages/shared/src/constants/node-pooling.ts | 7 + 11 files changed, 579 insertions(+), 21 deletions(-) create mode 100644 apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts diff --git a/apps/api/.env.example b/apps/api/.env.example index 776f34392..c544baa69 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -229,6 +229,7 @@ BASE_DOMAIN=workspaces.example.com # Workspace idle timeout (ProjectData DO) # WORKSPACE_IDLE_TIMEOUT_MS=7200000 # 2 hours — global default idle timeout before workspace is stopped (overridable per-project) +# WORKSPACE_STOPPED_TTL_MS=300000 # 5 minutes — auto-delete stopped workspaces after this TTL # Task agent configuration # DEFAULT_TASK_AGENT_TYPE=opencode # Agent used for autonomous task execution diff --git a/apps/api/src/durable-objects/node-lifecycle.ts b/apps/api/src/durable-objects/node-lifecycle.ts index 2caeda550..3a4fab02f 100644 --- a/apps/api/src/durable-objects/node-lifecycle.ts +++ b/apps/api/src/durable-objects/node-lifecycle.ts @@ -14,6 +14,11 @@ * alarm() → on `warm`: sets `destroying`, updates D1, cron handles teardown * → on `active`: no-op (was claimed between schedule and fire) * + * Workspace auto-deletion: + * scheduleWorkspaceDeletion(workspaceId, userId) → stores pending deletion, recalculates alarm + * cancelWorkspaceDeletion(workspaceId) → removes pending deletion, recalculates alarm + * alarm() → also processes expired workspace deletions (calls VM agent, updates D1) + * * Actual infrastructure destruction (Hetzner API, DNS) is handled by the * cron sweep, NOT by this DO — because user credentials are encrypted in D1 * and must be decrypted with CREDENTIAL_ENCRYPTION_KEY (or ENCRYPTION_KEY @@ -21,10 +26,11 @@ * * See: specs/021-task-chat-architecture/tasks.md (Phase 5) */ -import type { NodeLifecycleState,NodeLifecycleStatus } from '@simple-agent-manager/shared'; +import type { NodeLifecycleState, NodeLifecycleStatus } from '@simple-agent-manager/shared'; import { DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS, DEFAULT_NODE_WARM_TIMEOUT_MS, + DEFAULT_WORKSPACE_STOPPED_TTL_MS, } from '@simple-agent-manager/shared'; import { DurableObject } from 'cloudflare:workers'; @@ -33,6 +39,7 @@ import { log } from '../lib/logger'; type NodeLifecycleEnv = { DATABASE: D1Database; NODE_WARM_TIMEOUT_MS?: string; + WORKSPACE_STOPPED_TTL_MS?: string; }; interface StoredState { @@ -45,6 +52,12 @@ interface StoredState { warmTimeoutOverrideMs?: number | null; } +interface PendingWorkspaceDeletion { + workspaceId: string; + userId: string; + deleteAt: number; +} + export class NodeLifecycle extends DurableObject { /** * Mark a node as idle (warm). Called after the last workspace on the node @@ -73,8 +86,8 @@ export class NodeLifecycle extends DurableObject { }; await this.ctx.storage.put('state', newState); - // Schedule (or reschedule) alarm - await this.ctx.storage.setAlarm(now + warmTimeout); + // Recalculate alarm considering both warm timeout and pending workspace deletions + await this.recalculateAlarm(now + warmTimeout); // Update D1 warm_since column await this.updateD1WarmSince(nodeId, new Date(now).toISOString()); @@ -84,7 +97,7 @@ export class NodeLifecycle extends DurableObject { /** * Mark a node as active. Called when a workspace starts on the node. - * Cancels any pending warm timeout alarm. + * Cancels any pending warm timeout alarm but preserves workspace deletion alarms. */ async markActive(): Promise { const state = await this.getStoredState(); @@ -97,8 +110,8 @@ export class NodeLifecycle extends DurableObject { state.warmSince = null; await this.ctx.storage.put('state', state); - // Cancel any pending alarm - await this.ctx.storage.deleteAlarm(); + // Recalculate alarm — pending workspace deletions still need to fire + await this.recalculateAlarm(null); // Clear D1 warm_since await this.updateD1WarmSince(state.nodeId, null); @@ -128,8 +141,8 @@ export class NodeLifecycle extends DurableObject { state.warmSince = null; await this.ctx.storage.put('state', state); - // Cancel alarm - await this.ctx.storage.deleteAlarm(); + // Recalculate alarm — pending workspace deletions still need to fire + await this.recalculateAlarm(null); // Clear D1 warm_since await this.updateD1WarmSince(state.nodeId, null); @@ -148,19 +161,65 @@ export class NodeLifecycle extends DurableObject { return this.toPublicState(state); } + // ========================================================================= + // Workspace auto-deletion scheduling + // ========================================================================= + + /** + * Schedule a stopped workspace for automatic deletion after the configured TTL. + * Called when a workspace transitions to 'stopped' status. + */ + async scheduleWorkspaceDeletion(workspaceId: string, userId: string): Promise { + const ttl = this.getWorkspaceStoppedTtlMs(); + const deleteAt = Date.now() + ttl; + + const entry: PendingWorkspaceDeletion = { workspaceId, userId, deleteAt }; + await this.ctx.storage.put(`ws-delete:${workspaceId}`, entry); + + log.info('node_lifecycle.workspace_deletion_scheduled', { + workspaceId, + userId, + deleteAt: new Date(deleteAt).toISOString(), + ttlMs: ttl, + }); + + await this.recalculateAlarm(await this.getWarmAlarmTime()); + } + + /** + * Cancel a pending workspace deletion. Called when a workspace is restarted + * before the TTL expires. + */ + async cancelWorkspaceDeletion(workspaceId: string): Promise { + await this.ctx.storage.delete(`ws-delete:${workspaceId}`); + + log.info('node_lifecycle.workspace_deletion_cancelled', { workspaceId }); + + await this.recalculateAlarm(await this.getWarmAlarmTime()); + } + + // ========================================================================= + // Alarm handler + // ========================================================================= + /** - * Alarm handler. Fires when the warm timeout expires. + * Alarm handler. Fires when either: + * 1. The warm timeout expires (node should be destroyed) + * 2. A workspace deletion is due * - * If the node is still warm, transitions to `destroying` and marks D1 - * for the cron sweep to handle actual infrastructure teardown. - * If the node was claimed between schedule and fire, this is a no-op. + * Processes expired workspace deletions first, then handles warm timeout. */ async alarm(): Promise { + // Process any expired workspace deletions + await this.processExpiredDeletions(); + const state = await this.getStoredState(); if (!state) return; // No-op if node was claimed (active) or already destroying if (state.status === 'active') { + // Still recalculate alarm for any remaining pending workspace deletions + await this.recalculateAlarm(null); return; } @@ -171,7 +230,18 @@ export class NodeLifecycle extends DurableObject { return; } - // status === 'warm' → transition to destroying + // status === 'warm' → check if warm timeout has actually expired + if (state.warmSince) { + const warmTimeout = state.warmTimeoutOverrideMs ?? this.getWarmTimeoutMs(); + const warmExpiry = state.warmSince + warmTimeout; + if (Date.now() < warmExpiry) { + // Warm timeout hasn't expired yet — alarm fired for workspace deletion only + await this.recalculateAlarm(warmExpiry); + return; + } + } + + // Warm timeout expired → transition to destroying state.status = 'destroying'; await this.ctx.storage.put('state', state); @@ -215,6 +285,15 @@ export class NodeLifecycle extends DurableObject { return DEFAULT_NODE_WARM_TIMEOUT_MS; } + private getWorkspaceStoppedTtlMs(): number { + const envValue = this.env.WORKSPACE_STOPPED_TTL_MS; + if (envValue) { + const parsed = parseInt(envValue, 10); + if (Number.isFinite(parsed) && parsed > 0) return parsed; + } + return DEFAULT_WORKSPACE_STOPPED_TTL_MS; + } + private toPublicState(state: StoredState): NodeLifecycleState { return { nodeId: state.nodeId, @@ -238,4 +317,131 @@ export class NodeLifecycle extends DurableObject { }); } } + + /** + * Get all pending workspace deletions from DO storage. + */ + private async getPendingDeletions(): Promise> { + return await this.ctx.storage.list({ prefix: 'ws-delete:' }); + } + + /** + * Process all workspace deletions whose deleteAt time has passed. + */ + private async processExpiredDeletions(): Promise { + const pending = await this.getPendingDeletions(); + const now = Date.now(); + + for (const [key, entry] of pending) { + if (entry.deleteAt > now) continue; + + try { + await this.deleteWorkspace(entry.workspaceId, entry.userId); + await this.ctx.storage.delete(key); + + log.info('node_lifecycle.workspace_auto_deleted', { + workspaceId: entry.workspaceId, + userId: entry.userId, + }); + } catch (err) { + log.error('node_lifecycle.workspace_deletion_failed', { + workspaceId: entry.workspaceId, + userId: entry.userId, + error: err instanceof Error ? err.message : String(err), + }); + // Leave the entry for retry on next alarm. Push deleteAt forward slightly + // to avoid tight retry loops. + entry.deleteAt = now + DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS; + await this.ctx.storage.put(key, entry); + } + } + } + + /** + * Delete a workspace: call VM agent to remove Docker container + volume, + * then update D1 status to 'deleted'. + */ + private async deleteWorkspace(workspaceId: string, userId: string): Promise { + const state = await this.getStoredState(); + if (!state) return; + + // Look up node IP from D1 to call the VM agent + const nodeRow = await this.env.DATABASE.prepare( + `SELECT ip_address FROM nodes WHERE id = ?` + ).bind(state.nodeId).first<{ ip_address: string | null }>(); + + if (nodeRow?.ip_address) { + // Call VM agent DELETE endpoint to remove container + volume + try { + const protocol = 'https'; + const port = '8443'; + const url = `${protocol}://${nodeRow.ip_address}:${port}/workspaces/${workspaceId}`; + const resp = await fetch(url, { + method: 'DELETE', + headers: { 'X-User-ID': userId }, + signal: AbortSignal.timeout(30_000), + }); + + if (!resp.ok && resp.status !== 404) { + throw new Error(`VM agent DELETE returned ${resp.status}`); + } + } catch (err) { + // If the node is unreachable (already destroyed), log but don't fail + // The D1 status update below still marks the workspace as deleted + log.warn('node_lifecycle.workspace_delete_vm_agent_failed', { + workspaceId, + nodeId: state.nodeId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + + // Update D1 workspace status to 'deleted' + const now = new Date().toISOString(); + await this.env.DATABASE.prepare( + `UPDATE workspaces SET status = 'deleted', updated_at = ? WHERE id = ? AND status = 'stopped'` + ).bind(now, workspaceId).run(); + + // Clean up any agent_sessions referencing this workspace (best-effort) + try { + await this.env.DATABASE.prepare( + `UPDATE agent_sessions SET status = 'completed', updated_at = ? WHERE workspace_id = ? AND status NOT IN ('completed', 'failed')` + ).bind(now, workspaceId).run(); + } catch { + // best-effort + } + } + + /** + * Get the warm alarm time if the node is in warm state. + */ + private async getWarmAlarmTime(): Promise { + const state = await this.getStoredState(); + if (!state || state.status !== 'warm' || !state.warmSince) return null; + const warmTimeout = state.warmTimeoutOverrideMs ?? this.getWarmTimeoutMs(); + return state.warmSince + warmTimeout; + } + + /** + * Recalculate and set the alarm to the earliest time needed: + * either the warm timeout expiry or the earliest pending workspace deletion. + * + * @param warmAlarmTime - The warm timeout expiry time, or null if not applicable + */ + private async recalculateAlarm(warmAlarmTime: number | null): Promise { + let earliest = warmAlarmTime; + + const pending = await this.getPendingDeletions(); + for (const [, entry] of pending) { + if (earliest === null || entry.deleteAt < earliest) { + earliest = entry.deleteAt; + } + } + + if (earliest !== null) { + await this.ctx.storage.setAlarm(earliest); + } else { + await this.ctx.storage.deleteAlarm(); + } + } } diff --git a/apps/api/src/durable-objects/task-runner/state-machine.ts b/apps/api/src/durable-objects/task-runner/state-machine.ts index 40054bbb9..5038525fd 100644 --- a/apps/api/src/durable-objects/task-runner/state-machine.ts +++ b/apps/api/src/durable-objects/task-runner/state-machine.ts @@ -350,6 +350,20 @@ export async function cleanupOnFailure( error: err instanceof Error ? err.message : String(err), }); } + + // Schedule automatic deletion after TTL (best-effort) + try { + const doId = rc.env.NODE_LIFECYCLE.idFromName(state.stepResults.nodeId); + const stub = rc.env.NODE_LIFECYCLE.get(doId); + await (stub as unknown as import('../node-lifecycle').NodeLifecycle) + .scheduleWorkspaceDeletion(state.stepResults.workspaceId, state.userId); + } catch (err) { + log.warn('task_runner_do.cleanup.schedule_deletion_failed', { + taskId: state.taskId, + workspaceId: state.stepResults.workspaceId, + error: err instanceof Error ? err.message : String(err), + }); + } } // Clean up auto-provisioned node. If a workspace exists, cleanupTaskRun diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index 1b0b44cce..29e30ace2 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -139,6 +139,8 @@ export interface Env { ORPHANED_WORKSPACE_GRACE_PERIOD_MS?: string; // Workspace idle timeout (global default, overridable per-project) WORKSPACE_IDLE_TIMEOUT_MS?: string; + // Auto-delete stopped workspaces after this TTL (default: 300000 = 5 minutes) + WORKSPACE_STOPPED_TTL_MS?: string; // Task agent configuration DEFAULT_TASK_AGENT_TYPE?: string; // Built-in profile model overrides (defaults: claude-sonnet-4-5-20250929, claude-opus-4-6) diff --git a/apps/api/src/routes/workspaces/lifecycle.ts b/apps/api/src/routes/workspaces/lifecycle.ts index 11ff3fafb..82d486ebb 100644 --- a/apps/api/src/routes/workspaces/lifecycle.ts +++ b/apps/api/src/routes/workspaces/lifecycle.ts @@ -68,6 +68,16 @@ lifecycleRoutes.post('/:id/stop', requireAuth(), requireApproved(), async (c) => await stopComputeTracking(innerDb, workspace.id).catch((e) => { log.warn('workspace.compute_tracking_stop_failed', { workspaceId: workspace.id, error: String(e) }); }); + + // Schedule automatic deletion after TTL + try { + const doId = c.env.NODE_LIFECYCLE.idFromName(workspace.nodeId!); + const stub = c.env.NODE_LIFECYCLE.get(doId); + await (stub as unknown as import('../../durable-objects/node-lifecycle').NodeLifecycle) + .scheduleWorkspaceDeletion(workspace.id, userId); + } catch (e) { + log.warn('workspace.schedule_deletion_failed', { workspaceId: workspace.id, error: String(e) }); + } } catch (err) { await innerDb .update(schema.workspaces) @@ -122,6 +132,16 @@ lifecycleRoutes.post('/:id/restart', requireAuth(), requireApproved(), async (c) const node = await getOwnedNode(db, workspace.nodeId, userId); assertNodeOperational(node, 'restart workspace'); + // Cancel any pending auto-deletion before restarting + try { + const doId = c.env.NODE_LIFECYCLE.idFromName(workspace.nodeId!); + const stub = c.env.NODE_LIFECYCLE.get(doId); + await (stub as unknown as import('../../durable-objects/node-lifecycle').NodeLifecycle) + .cancelWorkspaceDeletion(workspace.id); + } catch (e) { + log.warn('workspace.cancel_deletion_failed', { workspaceId: workspace.id, error: String(e) }); + } + // Clear previous error state and boot logs before starting new provisioning await db .update(schema.workspaces) diff --git a/apps/api/src/scheduled/node-cleanup.ts b/apps/api/src/scheduled/node-cleanup.ts index 0d3672c70..409938c06 100644 --- a/apps/api/src/scheduled/node-cleanup.ts +++ b/apps/api/src/scheduled/node-cleanup.ts @@ -23,6 +23,7 @@ import { DEFAULT_MAX_AUTO_NODE_LIFETIME_MS, DEFAULT_NODE_WARM_GRACE_PERIOD_MS, DEFAULT_ORPHANED_WORKSPACE_GRACE_PERIOD_MS, + DEFAULT_WORKSPACE_STOPPED_TTL_MS, } from '@simple-agent-manager/shared'; import { eq } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/d1'; @@ -30,7 +31,7 @@ import { drizzle } from 'drizzle-orm/d1'; import * as schema from '../db/schema'; import type { Env } from '../env'; import { log } from '../lib/logger'; -import { stopWorkspaceOnNode } from '../services/node-agent'; +import { deleteWorkspaceOnNode, stopWorkspaceOnNode } from '../services/node-agent'; import { deleteNodeResources } from '../services/nodes'; import { persistError } from '../services/observability'; import * as projectDataService from '../services/project-data'; @@ -48,6 +49,7 @@ export interface NodeCleanupResult { lifetimeSkipped: number; orphanedWorkspacesFlagged: number; orphanedNodesFlagged: number; + stoppedWorkspacesDeleted: number; errors: number; } @@ -63,6 +65,7 @@ export async function runNodeCleanupSweep(env: Env): Promise lifetimeSkipped: 0, orphanedWorkspacesFlagged: 0, orphanedNodesFlagged: 0, + stoppedWorkspacesDeleted: 0, errors: 0, }; @@ -369,5 +372,44 @@ export async function runNodeCleanupSweep(env: Env): Promise result.orphanedNodesFlagged++; } + // 5. Safety-net: delete stopped workspaces past TTL that the DO alarm missed. + // This catches cases where the DO alarm failed to fire or wasn't scheduled. + const stoppedTtlMs = parseMs(env.WORKSPACE_STOPPED_TTL_MS, DEFAULT_WORKSPACE_STOPPED_TTL_MS); + // Add a 2x grace buffer so the DO alarm has time to fire first + const stoppedGraceThreshold = new Date(now.getTime() - stoppedTtlMs * 2).toISOString(); + const staleStoppedWorkspaces = await env.DATABASE.prepare( + `SELECT w.id, w.node_id, w.user_id + FROM workspaces w + WHERE w.status = 'stopped' + AND w.updated_at < ? + LIMIT 50` + ).bind(stoppedGraceThreshold).all<{ + id: string; + node_id: string | null; + user_id: string; + }>(); + + for (const ws of staleStoppedWorkspaces.results) { + try { + // Delete on VM agent (best-effort — node may be gone) + if (ws.node_id) { + await deleteWorkspaceOnNode(ws.node_id, ws.id, env, ws.user_id).catch((e) => { + log.warn('node_cleanup.stale_stopped_delete_on_node_failed', { workspaceId: ws.id, error: String(e) }); + }); + } + + // Mark as deleted in D1 + await db + .update(schema.workspaces) + .set({ status: 'deleted', updatedAt: new Date().toISOString() }) + .where(eq(schema.workspaces.id, ws.id)); + + result.stoppedWorkspacesDeleted++; + } catch (e) { + log.error('node_cleanup.stale_stopped_workspace_delete_failed', { workspaceId: ws.id, error: String(e) }); + result.errors++; + } + } + return result; } diff --git a/apps/api/src/services/task-runner.ts b/apps/api/src/services/task-runner.ts index cae3367ad..1a8a492e1 100644 --- a/apps/api/src/services/task-runner.ts +++ b/apps/api/src/services/task-runner.ts @@ -104,6 +104,22 @@ export async function cleanupTaskRun( }); } + // Schedule automatic deletion after TTL (best-effort) + if (workspace.nodeId && (workspace.status === 'running' || workspace.status === 'recovery' || workspace.status === 'stopped')) { + try { + const doId = env.NODE_LIFECYCLE.idFromName(workspace.nodeId); + const stub = env.NODE_LIFECYCLE.get(doId); + await (stub as unknown as import('../durable-objects/node-lifecycle').NodeLifecycle) + .scheduleWorkspaceDeletion(workspace.id, task.userId); + } catch (e) { + log.warn('task_run.cleanup.schedule_deletion_failed', { + taskId, + workspaceId: workspace.id, + error: e instanceof Error ? e.message : String(e), + }); + } + } + // If node was auto-provisioned for this task, check if it can be cleaned up if (task.autoProvisionedNodeId) { await cleanupAutoProvisionedNode( diff --git a/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts new file mode 100644 index 000000000..e21dc3113 --- /dev/null +++ b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts @@ -0,0 +1,249 @@ +/** + * Source contract tests for NodeLifecycle workspace auto-deletion feature. + * + * Verifies the NodeLifecycle DO workspace deletion scheduling, + * cancellation, and alarm-based deletion logic. + */ +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +import { describe, expect, it } from 'vitest'; + +describe('NodeLifecycle workspace auto-deletion source contract', () => { + const doFile = readFileSync(resolve(process.cwd(), 'src/durable-objects/node-lifecycle.ts'), 'utf8'); + const envFile = readFileSync(resolve(process.cwd(), 'src/env.ts'), 'utf8'); + const sharedConstantsFile = readFileSync( + resolve(process.cwd(), '../../packages/shared/src/constants/node-pooling.ts'), + 'utf8', + ); + const sharedIndexFile = readFileSync( + resolve(process.cwd(), '../../packages/shared/src/constants/index.ts'), + 'utf8', + ); + + describe('shared constant', () => { + it('defines DEFAULT_WORKSPACE_STOPPED_TTL_MS in node-pooling.ts', () => { + expect(sharedConstantsFile).toContain('export const DEFAULT_WORKSPACE_STOPPED_TTL_MS'); + expect(sharedConstantsFile).toContain('5 * 60 * 1000'); + }); + + it('exports DEFAULT_WORKSPACE_STOPPED_TTL_MS from constants barrel', () => { + expect(sharedIndexFile).toContain('DEFAULT_WORKSPACE_STOPPED_TTL_MS'); + }); + }); + + describe('env configuration', () => { + it('WORKSPACE_STOPPED_TTL_MS is defined in Env type', () => { + expect(envFile).toContain('WORKSPACE_STOPPED_TTL_MS?: string'); + }); + + it('NodeLifecycleEnv includes WORKSPACE_STOPPED_TTL_MS', () => { + expect(doFile).toContain('WORKSPACE_STOPPED_TTL_MS?: string'); + }); + }); + + describe('scheduleWorkspaceDeletion method', () => { + it('exists as a public async method', () => { + expect(doFile).toContain('async scheduleWorkspaceDeletion(workspaceId: string, userId: string)'); + }); + + it('reads configured TTL via getWorkspaceStoppedTtlMs', () => { + expect(doFile).toContain('this.getWorkspaceStoppedTtlMs()'); + }); + + it('stores entries with ws-delete: prefix in DO storage', () => { + expect(doFile).toContain('`ws-delete:${workspaceId}`'); + }); + + it('stores PendingWorkspaceDeletion with workspaceId, userId, deleteAt', () => { + expect(doFile).toContain('const entry: PendingWorkspaceDeletion = { workspaceId, userId, deleteAt }'); + }); + + it('recalculates alarm after scheduling', () => { + const method = doFile.slice(doFile.indexOf('async scheduleWorkspaceDeletion')); + expect(method).toContain('recalculateAlarm'); + }); + }); + + describe('cancelWorkspaceDeletion method', () => { + it('exists as a public async method', () => { + expect(doFile).toContain('async cancelWorkspaceDeletion(workspaceId: string)'); + }); + + it('deletes the ws-delete: entry from DO storage', () => { + expect(doFile).toContain("this.ctx.storage.delete(`ws-delete:${workspaceId}`)"); + }); + + it('recalculates alarm after cancellation', () => { + const method = doFile.slice(doFile.indexOf('async cancelWorkspaceDeletion')); + const methodEnd = method.indexOf('async ', 10); + const methodBody = method.slice(0, methodEnd > 0 ? methodEnd : undefined); + expect(methodBody).toContain('recalculateAlarm'); + }); + }); + + describe('alarm handler processes workspace deletions', () => { + it('calls processExpiredDeletions in alarm()', () => { + const alarmMethod = doFile.slice(doFile.indexOf('async alarm()')); + expect(alarmMethod).toContain('processExpiredDeletions'); + }); + + it('processExpiredDeletions checks deleteAt against now', () => { + expect(doFile).toContain('entry.deleteAt > now'); + }); + + it('calls deleteWorkspace for expired entries', () => { + expect(doFile).toContain('await this.deleteWorkspace(entry.workspaceId, entry.userId)'); + }); + + it('removes processed entries from storage', () => { + const processMethod = doFile.slice(doFile.indexOf('processExpiredDeletions')); + expect(processMethod).toContain('this.ctx.storage.delete(key)'); + }); + + it('retries failed deletions by pushing deleteAt forward', () => { + expect(doFile).toContain('entry.deleteAt = now + DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS'); + }); + }); + + describe('deleteWorkspace implementation', () => { + it('looks up node IP from D1', () => { + expect(doFile).toContain('SELECT ip_address FROM nodes WHERE id = ?'); + }); + + it('calls VM agent DELETE endpoint', () => { + expect(doFile).toContain('/workspaces/${workspaceId}'); + expect(doFile).toContain("method: 'DELETE'"); + }); + + it('updates D1 workspace status to deleted', () => { + expect(doFile).toContain("UPDATE workspaces SET status = 'deleted'"); + }); + + it('only deletes workspaces that are still stopped', () => { + expect(doFile).toContain("AND status = 'stopped'"); + }); + + it('cleans up agent_sessions (best-effort)', () => { + expect(doFile).toContain('UPDATE agent_sessions SET status'); + }); + + it('handles unreachable VM agent gracefully (node may be gone)', () => { + expect(doFile).toContain('node_lifecycle.workspace_delete_vm_agent_failed'); + }); + }); + + describe('recalculateAlarm picks earliest time', () => { + it('considers warm alarm time', () => { + const method = doFile.slice(doFile.indexOf('private async recalculateAlarm')); + expect(method).toContain('warmAlarmTime'); + }); + + it('considers pending workspace deletions', () => { + const method = doFile.slice(doFile.indexOf('private async recalculateAlarm')); + expect(method).toContain('getPendingDeletions'); + }); + + it('picks the minimum of all times', () => { + const method = doFile.slice(doFile.indexOf('private async recalculateAlarm')); + expect(method).toContain('entry.deleteAt < earliest'); + }); + + it('deletes alarm when no pending events', () => { + const method = doFile.slice(doFile.indexOf('private async recalculateAlarm')); + expect(method).toContain('deleteAlarm()'); + }); + + it('sets alarm to earliest time', () => { + const method = doFile.slice(doFile.indexOf('private async recalculateAlarm')); + expect(method).toContain('setAlarm(earliest)'); + }); + }); + + describe('markActive preserves workspace deletion alarms', () => { + it('calls recalculateAlarm(null) instead of deleteAlarm()', () => { + const markActiveStart = doFile.indexOf('async markActive()'); + const markActiveEnd = doFile.indexOf('async tryClaim', markActiveStart); + const markActiveBody = doFile.slice(markActiveStart, markActiveEnd); + expect(markActiveBody).toContain('recalculateAlarm(null)'); + expect(markActiveBody).not.toContain('deleteAlarm()'); + }); + }); + + describe('tryClaim preserves workspace deletion alarms', () => { + it('calls recalculateAlarm(null) instead of deleteAlarm()', () => { + const tryClaimStart = doFile.indexOf('async tryClaim('); + const tryClaimEnd = doFile.indexOf('async getStatus', tryClaimStart); + const tryClaimBody = doFile.slice(tryClaimStart, tryClaimEnd); + expect(tryClaimBody).toContain('recalculateAlarm(null)'); + expect(tryClaimBody).not.toContain('deleteAlarm()'); + }); + }); + + describe('configurable workspace stopped TTL', () => { + it('uses WORKSPACE_STOPPED_TTL_MS env var', () => { + expect(doFile).toContain('WORKSPACE_STOPPED_TTL_MS'); + }); + + it('falls back to DEFAULT_WORKSPACE_STOPPED_TTL_MS', () => { + expect(doFile).toContain('DEFAULT_WORKSPACE_STOPPED_TTL_MS'); + }); + + it('imports the constant from shared', () => { + expect(doFile).toContain("DEFAULT_WORKSPACE_STOPPED_TTL_MS,"); + }); + }); + + describe('callers schedule deletion on workspace stop', () => { + const lifecycleFile = readFileSync(resolve(process.cwd(), 'src/routes/workspaces/lifecycle.ts'), 'utf8'); + const taskRunnerFile = readFileSync(resolve(process.cwd(), 'src/services/task-runner.ts'), 'utf8'); + const stateMachineFile = readFileSync( + resolve(process.cwd(), 'src/durable-objects/task-runner/state-machine.ts'), + 'utf8', + ); + + it('lifecycle stop route calls scheduleWorkspaceDeletion', () => { + expect(lifecycleFile).toContain('scheduleWorkspaceDeletion(workspace.id, userId)'); + }); + + it('lifecycle restart route calls cancelWorkspaceDeletion', () => { + expect(lifecycleFile).toContain('cancelWorkspaceDeletion(workspace.id)'); + }); + + it('cleanupTaskRun calls scheduleWorkspaceDeletion', () => { + expect(taskRunnerFile).toContain('scheduleWorkspaceDeletion(workspace.id, task.userId)'); + }); + + it('cleanupOnFailure calls scheduleWorkspaceDeletion', () => { + expect(stateMachineFile).toContain( + 'scheduleWorkspaceDeletion(state.stepResults.workspaceId, state.userId)', + ); + }); + }); + + describe('cron safety-net for stale stopped workspaces', () => { + const cronFile = readFileSync(resolve(process.cwd(), 'src/scheduled/node-cleanup.ts'), 'utf8'); + + it('imports DEFAULT_WORKSPACE_STOPPED_TTL_MS', () => { + expect(cronFile).toContain('DEFAULT_WORKSPACE_STOPPED_TTL_MS'); + }); + + it('includes stoppedWorkspacesDeleted in result', () => { + expect(cronFile).toContain('stoppedWorkspacesDeleted'); + }); + + it('queries for stopped workspaces past TTL', () => { + expect(cronFile).toContain("w.status = 'stopped'"); + expect(cronFile).toContain('stoppedGraceThreshold'); + }); + + it('calls deleteWorkspaceOnNode for stale stopped workspaces', () => { + expect(cronFile).toContain('deleteWorkspaceOnNode(ws.node_id, ws.id, env, ws.user_id)'); + }); + + it('marks stale stopped workspaces as deleted in D1', () => { + const step5Section = cronFile.slice(cronFile.indexOf('Safety-net')); + expect(step5Section).toContain("status: 'deleted'"); + }); + }); +}); diff --git a/apps/api/tests/unit/node-lifecycle.test.ts b/apps/api/tests/unit/node-lifecycle.test.ts index 7871ea1ab..3e7167aa2 100644 --- a/apps/api/tests/unit/node-lifecycle.test.ts +++ b/apps/api/tests/unit/node-lifecycle.test.ts @@ -47,8 +47,8 @@ describe('NodeLifecycle DO source contract', () => { expect(doFile).toContain('warmSince: now'); }); - it('schedules alarm at now + timeout', () => { - expect(doFile).toContain('setAlarm(now + warmTimeout)'); + it('recalculates alarm considering warm timeout and pending deletions', () => { + expect(doFile).toContain('recalculateAlarm(now + warmTimeout)'); }); it('updates D1 warm_since column', () => { @@ -69,8 +69,9 @@ describe('NodeLifecycle DO source contract', () => { expect(doFile).toContain('state.claimedByTask = null'); }); - it('cancels alarm via deleteAlarm', () => { - expect(doFile).toContain('this.ctx.storage.deleteAlarm()'); + it('recalculates alarm (preserves workspace deletion alarms)', () => { + const markActiveSection = doFile.slice(doFile.indexOf('async markActive()')); + expect(markActiveSection).toContain('recalculateAlarm(null)'); }); it('clears D1 warm_since', () => { @@ -96,10 +97,9 @@ describe('NodeLifecycle DO source contract', () => { expect(doFile).toContain('state.claimedByTask = taskId'); }); - it('cancels alarm on successful claim', () => { - // deleteAlarm is called inside tryClaim after claiming + it('recalculates alarm on successful claim (preserves workspace deletion alarms)', () => { const tryClaimSection = doFile.slice(doFile.indexOf('async tryClaim')); - expect(tryClaimSection).toContain('deleteAlarm()'); + expect(tryClaimSection).toContain('recalculateAlarm(null)'); }); }); diff --git a/packages/shared/src/constants/index.ts b/packages/shared/src/constants/index.ts index 4e673127c..9c3829f48 100644 --- a/packages/shared/src/constants/index.ts +++ b/packages/shared/src/constants/index.ts @@ -84,6 +84,7 @@ export { DEFAULT_NODE_WARM_TIMEOUT_MS, DEFAULT_ORPHANED_WORKSPACE_GRACE_PERIOD_MS, DEFAULT_WORKSPACE_IDLE_TIMEOUT_MS, + DEFAULT_WORKSPACE_STOPPED_TTL_MS, MAX_NODE_IDLE_TIMEOUT_MS, MAX_WORKSPACE_IDLE_TIMEOUT_MS, MIN_NODE_IDLE_TIMEOUT_MS, diff --git a/packages/shared/src/constants/node-pooling.ts b/packages/shared/src/constants/node-pooling.ts index a527ac5e3..d4560ade6 100644 --- a/packages/shared/src/constants/node-pooling.ts +++ b/packages/shared/src/constants/node-pooling.ts @@ -17,6 +17,13 @@ export const DEFAULT_ORPHANED_WORKSPACE_GRACE_PERIOD_MS = 10 * 60 * 1000; // 10 /** Default alarm retry delay (ms) when node destruction fails. */ export const DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS = 60 * 1000; // 1 minute +// ============================================================================= +// Workspace Stopped Auto-Delete +// ============================================================================= + +/** Default TTL (ms) before a stopped workspace is automatically deleted. Override via WORKSPACE_STOPPED_TTL_MS env var. */ +export const DEFAULT_WORKSPACE_STOPPED_TTL_MS = 5 * 60 * 1000; // 5 minutes + // ============================================================================= // Workspace Idle Timeout (Compute Lifecycle Management) // ============================================================================= From e770ea951b95e91b433614e779176e7b4bf50506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 03:54:14 +0000 Subject: [PATCH 3/7] fix(test): update existing tests for recalculateAlarm and new cleanup counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - warm-node-pooling integration test: setAlarm → recalculateAlarm, deleteAlarm → recalculateAlarm(null) - node-cleanup unit test: add stoppedWorkspacesDeleted to expected result, mock node-agent and project-data Co-Authored-By: Claude Opus 4.6 --- .../api/tests/integration/warm-node-pooling.test.ts | 4 ++-- apps/api/tests/unit/node-cleanup.test.ts | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/apps/api/tests/integration/warm-node-pooling.test.ts b/apps/api/tests/integration/warm-node-pooling.test.ts index bb647f092..9e429ba27 100644 --- a/apps/api/tests/integration/warm-node-pooling.test.ts +++ b/apps/api/tests/integration/warm-node-pooling.test.ts @@ -39,7 +39,7 @@ describe('warm node pooling lifecycle integration', () => { it('DO.markIdle sets warm status and schedules alarm', () => { expect(doFile).toContain("status: 'warm'"); - expect(doFile).toContain('setAlarm(now + warmTimeout)'); + expect(doFile).toContain('recalculateAlarm(now + warmTimeout)'); }); it('DO.markIdle updates D1 warm_since', () => { @@ -64,7 +64,7 @@ describe('warm node pooling lifecycle integration', () => { const tryClaimSection = doFile.slice(doFile.indexOf('async tryClaim')); expect(tryClaimSection).toContain("state.status = 'active'"); expect(tryClaimSection).toContain('state.claimedByTask = taskId'); - expect(tryClaimSection).toContain('deleteAlarm()'); + expect(tryClaimSection).toContain('recalculateAlarm(null)'); }); }); diff --git a/apps/api/tests/unit/node-cleanup.test.ts b/apps/api/tests/unit/node-cleanup.test.ts index 2c9b85544..d0b3a52c7 100644 --- a/apps/api/tests/unit/node-cleanup.test.ts +++ b/apps/api/tests/unit/node-cleanup.test.ts @@ -15,6 +15,18 @@ vi.mock('../../src/services/nodes', () => ({ deleteNodeResources: vi.fn().mockResolvedValue(undefined), })); +// Mock node-agent service +vi.mock('../../src/services/node-agent', () => ({ + deleteWorkspaceOnNode: vi.fn().mockResolvedValue(undefined), + stopWorkspaceOnNode: vi.fn().mockResolvedValue(undefined), +})); + +// Mock project-data service +vi.mock('../../src/services/project-data', () => ({ + stopSession: vi.fn().mockResolvedValue(undefined), + cleanupWorkspaceActivity: vi.fn().mockResolvedValue(undefined), +})); + // Mock persistError vi.mock('../../src/services/observability', () => ({ persistError: vi.fn().mockResolvedValue(undefined), @@ -228,6 +240,7 @@ describe('runNodeCleanupSweep', () => { lifetimeSkipped: 0, orphanedWorkspacesFlagged: 0, orphanedNodesFlagged: 0, + stoppedWorkspacesDeleted: 0, errors: 0, }); }); From 3a7f950ae250292288d1c2944b4a9e246e492822 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 04:02:11 +0000 Subject: [PATCH 4/7] task: archive stopped-workspace-auto-delete task Co-Authored-By: Claude Opus 4.6 --- .../2026-05-06-stopped-workspace-auto-delete.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tasks/{active => archive}/2026-05-06-stopped-workspace-auto-delete.md (100%) diff --git a/tasks/active/2026-05-06-stopped-workspace-auto-delete.md b/tasks/archive/2026-05-06-stopped-workspace-auto-delete.md similarity index 100% rename from tasks/active/2026-05-06-stopped-workspace-auto-delete.md rename to tasks/archive/2026-05-06-stopped-workspace-auto-delete.md From 4fcb6ca022cc3e37b1560c3a0ebdc5a99fa22f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 04:07:57 +0000 Subject: [PATCH 5/7] fix: address cloudflare-specialist review findings - [HIGH] Use deleteWorkspaceOnNode shared helper with proper JWT auth instead of raw IP fetch with X-User-ID header - [HIGH] Hoist getStoredState() before the deletion loop to avoid N redundant DO storage reads - [MEDIUM] Add status='stopped' guard to cron sweep D1 update (TOCTOU) - [MEDIUM] Use recalculateAlarm in destroying branch so pending workspace deletions are not delayed by retry window Co-Authored-By: Claude Opus 4.6 --- .../api/src/durable-objects/node-lifecycle.ts | 57 +++++++------------ apps/api/src/scheduled/node-cleanup.ts | 6 +- .../node-lifecycle-workspace-deletion.test.ts | 11 +--- 3 files changed, 27 insertions(+), 47 deletions(-) diff --git a/apps/api/src/durable-objects/node-lifecycle.ts b/apps/api/src/durable-objects/node-lifecycle.ts index 3a4fab02f..122407ee2 100644 --- a/apps/api/src/durable-objects/node-lifecycle.ts +++ b/apps/api/src/durable-objects/node-lifecycle.ts @@ -34,7 +34,9 @@ import { } from '@simple-agent-manager/shared'; import { DurableObject } from 'cloudflare:workers'; +import type { Env } from '../env'; import { log } from '../lib/logger'; +import { deleteWorkspaceOnNode } from '../services/node-agent'; type NodeLifecycleEnv = { DATABASE: D1Database; @@ -263,8 +265,8 @@ export class NodeLifecycle extends DurableObject { nodeId: state.nodeId, error: err instanceof Error ? err.message : String(err), }); - // Schedule retry - await this.ctx.storage.setAlarm(Date.now() + DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS); + // Schedule retry (use recalculateAlarm to not delay pending workspace deletions) + await this.recalculateAlarm(Date.now() + DEFAULT_NODE_LIFECYCLE_ALARM_RETRY_MS); } } @@ -332,11 +334,15 @@ export class NodeLifecycle extends DurableObject { const pending = await this.getPendingDeletions(); const now = Date.now(); + // Load state once to get nodeId — avoids N storage reads in the loop + const state = await this.getStoredState(); + if (!state) return; + for (const [key, entry] of pending) { if (entry.deleteAt > now) continue; try { - await this.deleteWorkspace(entry.workspaceId, entry.userId); + await this.deleteWorkspace(state.nodeId, entry.workspaceId, entry.userId); await this.ctx.storage.delete(key); log.info('node_lifecycle.workspace_auto_deleted', { @@ -361,39 +367,18 @@ export class NodeLifecycle extends DurableObject { * Delete a workspace: call VM agent to remove Docker container + volume, * then update D1 status to 'deleted'. */ - private async deleteWorkspace(workspaceId: string, userId: string): Promise { - const state = await this.getStoredState(); - if (!state) return; - - // Look up node IP from D1 to call the VM agent - const nodeRow = await this.env.DATABASE.prepare( - `SELECT ip_address FROM nodes WHERE id = ?` - ).bind(state.nodeId).first<{ ip_address: string | null }>(); - - if (nodeRow?.ip_address) { - // Call VM agent DELETE endpoint to remove container + volume - try { - const protocol = 'https'; - const port = '8443'; - const url = `${protocol}://${nodeRow.ip_address}:${port}/workspaces/${workspaceId}`; - const resp = await fetch(url, { - method: 'DELETE', - headers: { 'X-User-ID': userId }, - signal: AbortSignal.timeout(30_000), - }); - - if (!resp.ok && resp.status !== 404) { - throw new Error(`VM agent DELETE returned ${resp.status}`); - } - } catch (err) { - // If the node is unreachable (already destroyed), log but don't fail - // The D1 status update below still marks the workspace as deleted - log.warn('node_lifecycle.workspace_delete_vm_agent_failed', { - workspaceId, - nodeId: state.nodeId, - error: err instanceof Error ? err.message : String(err), - }); - } + private async deleteWorkspace(nodeId: string, workspaceId: string, userId: string): Promise { + // Call VM agent DELETE endpoint via shared helper (handles JWT auth, proper URL routing) + try { + await deleteWorkspaceOnNode(nodeId, workspaceId, this.env as unknown as Env, userId); + } catch (err) { + // If the node is unreachable (already destroyed), log but don't fail + // The D1 status update below still marks the workspace as deleted + log.warn('node_lifecycle.workspace_delete_vm_agent_failed', { + workspaceId, + nodeId, + error: err instanceof Error ? err.message : String(err), + }); } // Update D1 workspace status to 'deleted' diff --git a/apps/api/src/scheduled/node-cleanup.ts b/apps/api/src/scheduled/node-cleanup.ts index 409938c06..c736978aa 100644 --- a/apps/api/src/scheduled/node-cleanup.ts +++ b/apps/api/src/scheduled/node-cleanup.ts @@ -25,7 +25,7 @@ import { DEFAULT_ORPHANED_WORKSPACE_GRACE_PERIOD_MS, DEFAULT_WORKSPACE_STOPPED_TTL_MS, } from '@simple-agent-manager/shared'; -import { eq } from 'drizzle-orm'; +import { and, eq } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/d1'; import * as schema from '../db/schema'; @@ -398,11 +398,11 @@ export async function runNodeCleanupSweep(env: Env): Promise }); } - // Mark as deleted in D1 + // Mark as deleted in D1 (status guard prevents TOCTOU race if workspace was restarted) await db .update(schema.workspaces) .set({ status: 'deleted', updatedAt: new Date().toISOString() }) - .where(eq(schema.workspaces.id, ws.id)); + .where(and(eq(schema.workspaces.id, ws.id), eq(schema.workspaces.status, 'stopped'))); result.stoppedWorkspacesDeleted++; } catch (e) { diff --git a/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts index e21dc3113..ed0df5142 100644 --- a/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts +++ b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts @@ -93,7 +93,7 @@ describe('NodeLifecycle workspace auto-deletion source contract', () => { }); it('calls deleteWorkspace for expired entries', () => { - expect(doFile).toContain('await this.deleteWorkspace(entry.workspaceId, entry.userId)'); + expect(doFile).toContain('await this.deleteWorkspace(state.nodeId, entry.workspaceId, entry.userId)'); }); it('removes processed entries from storage', () => { @@ -107,13 +107,8 @@ describe('NodeLifecycle workspace auto-deletion source contract', () => { }); describe('deleteWorkspace implementation', () => { - it('looks up node IP from D1', () => { - expect(doFile).toContain('SELECT ip_address FROM nodes WHERE id = ?'); - }); - - it('calls VM agent DELETE endpoint', () => { - expect(doFile).toContain('/workspaces/${workspaceId}'); - expect(doFile).toContain("method: 'DELETE'"); + it('calls deleteWorkspaceOnNode via shared helper (proper JWT auth)', () => { + expect(doFile).toContain('deleteWorkspaceOnNode(nodeId, workspaceId, this.env as unknown as Env, userId)'); }); it('updates D1 workspace status to deleted', () => { From 100506f4c1a15f3d308d78446517b2b710bd6485 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 04:10:52 +0000 Subject: [PATCH 6/7] feat: wire scheduleWorkspaceDeletion into idle cleanup path The processExpiredCleanups idle timeout path stops workspaces but wasn't scheduling automatic deletion. Workspaces stopped via idle timeout now get deletion scheduled via the NodeLifecycle DO, matching the lifecycle route and task-runner paths. Co-Authored-By: Claude Opus 4.6 --- .../src/durable-objects/project-data/index.ts | 19 ++++++++++++++++++- .../node-lifecycle-workspace-deletion.test.ts | 8 ++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/apps/api/src/durable-objects/project-data/index.ts b/apps/api/src/durable-objects/project-data/index.ts index 295f974b5..73677bcee 100644 --- a/apps/api/src/durable-objects/project-data/index.ts +++ b/apps/api/src/durable-objects/project-data/index.ts @@ -350,7 +350,24 @@ export class ProjectData extends DurableObject { (type, payload, sid) => this.broadcastEvent(type, payload, sid), () => this.scheduleSummarySync()); await idleCleanup.processExpiredCleanups(this.sql, this.env, (taskId) => idleCleanup.completeTaskInD1(this.env.DATABASE, taskId), - (workspaceId) => idleCleanup.stopWorkspaceInD1(this.env.DATABASE, workspaceId), + async (workspaceId) => { + await idleCleanup.stopWorkspaceInD1(this.env.DATABASE, workspaceId); + // Schedule automatic deletion after TTL (best-effort) + try { + const workerEnv = this.env as unknown as import('../../env').Env; + const wsRow = await workerEnv.DATABASE.prepare( + 'SELECT node_id, user_id FROM workspaces WHERE id = ?' + ).bind(workspaceId).first<{ node_id: string | null; user_id: string }>(); + if (wsRow?.node_id) { + const doId = workerEnv.NODE_LIFECYCLE.idFromName(wsRow.node_id); + const stub = workerEnv.NODE_LIFECYCLE.get(doId); + await (stub as unknown as import('../node-lifecycle').NodeLifecycle) + .scheduleWorkspaceDeletion(workspaceId, wsRow.user_id); + } + } catch { + // Best-effort — cron safety-net will catch it + } + }, (type, payload, sid) => this.broadcastEvent(type, payload, sid), () => this.scheduleSummarySync()); // Mailbox delivery sweep: expire stale messages and re-queue unacked ones diff --git a/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts index ed0df5142..4ca8e572c 100644 --- a/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts +++ b/apps/api/tests/unit/node-lifecycle-workspace-deletion.test.ts @@ -214,6 +214,14 @@ describe('NodeLifecycle workspace auto-deletion source contract', () => { 'scheduleWorkspaceDeletion(state.stepResults.workspaceId, state.userId)', ); }); + + it('idle cleanup processExpiredCleanups schedules deletion after stop', () => { + const projectDataIndexFile = readFileSync( + resolve(process.cwd(), 'src/durable-objects/project-data/index.ts'), + 'utf8', + ); + expect(projectDataIndexFile).toContain('scheduleWorkspaceDeletion(workspaceId, wsRow.user_id)'); + }); }); describe('cron safety-net for stale stopped workspaces', () => { From 5e08effdf153c591166fe2201c8fbda4ada50b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Titsworth-Morin?= Date: Wed, 6 May 2026 04:30:57 +0000 Subject: [PATCH 7/7] ci: trigger fresh CI run with updated PR body