diff --git a/CLAUDE.md b/CLAUDE.md index 6c41784c3..2c5abc6a4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -246,6 +246,7 @@ Domains chain together: competitive research feeds marketing and business strate ## Recent Changes +- compact-mode-lazy-load-tool-content: Chat message compact mode with lazy-load tool content — `getMessages()` in `apps/api/src/durable-objects/project-data/messages.ts` gains `compact` parameter (default false); when compact=true, `parseChatMessageRowCompact()` in `row-schemas.ts` strips `tool_metadata.content` array and replaces it with `contentSize` (UTF-8 byte count), reducing RPC payload by 80-90% for tool-heavy sessions; new `getMessageToolContent()` method fetches individual message content on demand; chat session detail route (`GET /:sessionId`) in `apps/api/src/routes/chat.ts` defaults to compact=true (configurable via `CHAT_COMPACT_MODE_DEFAULT` env var); new `GET /:sessionId/messages/:messageId/tool-content` endpoint returns full content for lazy loading; `ToolCallItem` in `packages/acp-client` gains optional `contentSize`, `contentLoaded`, `messageId` fields; `ToolCallCard` component shows "Load content (X KB)" hint when compact, fetches content on expand via `onLoadContent` callback, caches in component state; `chatMessagesToConversationItems()` in `apps/web/src/components/project-message-view/types.ts` detects compact metadata and wires messageId for lazy loading; `getMessageToolContent()` API client in `apps/web/src/lib/api/sessions.ts`; configurable via CHAT_COMPACT_MODE_DEFAULT (default: true) - harness-track-d-integration-design: SAM-native harness Track D design — `docs/architecture/agent-harness-integration.md` maps the Go harness, Cloudflare Sandbox SDK, and AI Gateway prototype evidence into production runtime choices for ProjectAgent/SamSession and future VM harness execution; shared model routing helper `filterModelsForAgentLoop()` plus `DEFAULT_SANDBOX_MODEL`, `DEFAULT_SANDBOX_AGENT_MAX_TURNS`, and `AGENT_LOOP_MIN_TOOL_CALL_SUPPORT` live in `packages/shared/src/constants/ai-services.ts`; sandbox result/config types live in `packages/shared/src/types/sandbox.ts`; the default Workers AI tool-loop model is Gemma 4 26B based on the 2026-05-05 harness evaluation, with Qwen 2.5 Coder retained as a fallback. - ai-proxy-universal-tracking: Universal AI proxy passthrough for usage tracking — URL-path-based proxy routes (`apps/api/src/routes/ai-proxy-passthrough.ts`) at `/ai/proxy/:wstoken/anthropic/v1/messages`, `/ai/proxy/:wstoken/anthropic/v1/messages/count_tokens`, `/ai/proxy/:wstoken/openai/v1/chat/completions` embed workspace callback token in URL path, freeing auth headers for user's own API keys; runtime.ts (`apps/api/src/routes/workspaces/runtime.ts`) returns `inferenceConfig` when AI proxy is enabled and the credential can be forwarded upstream — two modes: `apiKeySource: 'user-credential'` with provider `anthropic-passthrough` or `openai-passthrough` (user has own API key, passthrough proxy for tracking only) and `apiKeySource: 'callback-token'` with provider `anthropic-proxy` or `openai-proxy` (platform proxy, existing behavior); Claude Code OAuth tokens are intentionally excluded from Anthropic passthrough and returned directly so the VM agent injects `CLAUDE_CODE_OAUTH_TOKEN`; base URLs use `{wstoken}` placeholder replaced at injection time by VM agent (`packages/vm-agent/internal/acp/session_host.go`); per-user RPM rate limiting and daily token budget applied; `cf-aig-metadata` injected for cost attribution via AI Gateway; user API keys forwarded via `x-api-key` (Anthropic) or `Authorization` (OpenAI) headers; configurable via AI_PROXY_ENABLED, AI_PROXY_RATE_LIMIT_RPM, AI_GATEWAY_ID, CF_ACCOUNT_ID - user-ai-budget-controls: User-facing AI budget controls — `GET /api/usage/ai/budget` route (`apps/api/src/routes/usage.ts`) returns user's budget settings, daily usage, effective limits (3-tier resolution: user → env → constant), monthly cost from AI Gateway logs, utilization percentages, and exceeded flag; `PUT /api/usage/ai/budget` validates and saves custom budget settings to KV (`ai-budget-settings:{userId}`); `DELETE /api/usage/ai/budget` resets to platform defaults; budget service (`apps/api/src/services/ai-token-budget.ts`) provides `getUserBudgetSettings()`, `saveUserBudgetSettings()`, `deleteUserBudgetSettings()`, `validateBudgetUpdate()`, `resolveEffectiveLimits()`, `checkTokenBudget()`, `incrementTokenUsage()`; `BudgetSettingsSection` component in `SettingsComputeUsage.tsx` with `BudgetBar` utilization progress bars (color-coded: green < 80%, yellow 80-99%, red ≥ 100%), budget exceeded alert banner, Configure/Save/Cancel form with daily input/output token limits, monthly cost cap, alert threshold; shared types `UserAiBudgetSettings`, `UserAiBudgetResponse`, `UpdateAiBudgetRequest` in `packages/shared/src/types/ai-usage.ts`; configurable via AI_PROXY_DAILY_INPUT_TOKEN_LIMIT (default: 500000), AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT (default: 200000), AI_USAGE_MAX_DAILY_TOKEN_LIMIT (default: 10000000), AI_USAGE_MAX_MONTHLY_COST_CAP_USD (default: 1000), AI_USAGE_MIN_DAILY_TOKEN_LIMIT (default: 1000), AI_USAGE_MIN_MONTHLY_COST_CAP_USD (default: 0.01), AI_USAGE_BUDGET_TTL_SECONDS (default: 90000) diff --git a/apps/api/.env.example b/apps/api/.env.example index c544baa69..81b4cee3d 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -381,6 +381,9 @@ BASE_DOMAIN=workspaces.example.com # SEGMENT_FETCH_TIMEOUT_MS=30000 # Timeout for Segment API fetch # GA4_FETCH_TIMEOUT_MS=30000 # Timeout for GA4 API fetch +# Chat compact mode (message size optimization) +# CHAT_COMPACT_MODE_DEFAULT=true # Strip tool_metadata.content on read path (default: true) + # File proxy configuration (chat file browser) # FILE_PROXY_TIMEOUT_MS=15000 # Timeout for VM agent file proxy requests (ms) # FILE_PROXY_MAX_RESPONSE_BYTES=2097152 # Max response size for text file proxy (2MB) diff --git a/apps/api/src/durable-objects/project-data/index.ts b/apps/api/src/durable-objects/project-data/index.ts index 73677bcee..6ae3c240a 100644 --- a/apps/api/src/durable-objects/project-data/index.ts +++ b/apps/api/src/durable-objects/project-data/index.ts @@ -149,8 +149,12 @@ export class ProjectData extends DurableObject { return result ? this.addBaseDomain(result) : null; } - async getMessages(sessionId: string, limit: number = 1000, before: number | null = null, roles?: string[]) { - return messages.getMessages(this.sql, sessionId, limit, before, roles); + async getMessages(sessionId: string, limit: number = 1000, before: number | null = null, roles?: string[], compact: boolean = false) { + return messages.getMessages(this.sql, sessionId, limit, before, roles, compact); + } + + async getMessageToolContent(sessionId: string, messageId: string): Promise { + return messages.getMessageToolContent(this.sql, sessionId, messageId); } getMessageCount(sessionId: string, roles?: string[]): number { diff --git a/apps/api/src/durable-objects/project-data/messages.ts b/apps/api/src/durable-objects/project-data/messages.ts index 6d18b3493..b873c4beb 100644 --- a/apps/api/src/durable-objects/project-data/messages.ts +++ b/apps/api/src/durable-objects/project-data/messages.ts @@ -4,6 +4,7 @@ import { log } from '../../lib/logger'; import { parseChatMessageRow, + parseChatMessageRowCompact, parseCount, parseMaxSeq, parseMessageCount, @@ -279,7 +280,8 @@ export function getMessages( sessionId: string, limit: number = 1000, before: number | null = null, - roles?: string[] + roles?: string[], + compact: boolean = false ): { messages: Record[]; hasMore: boolean } { let query = 'SELECT id, session_id, role, content, tool_metadata, created_at, sequence FROM chat_messages WHERE session_id = ?'; @@ -329,8 +331,9 @@ export function getMessages( const trimmedRows = candidateRows.slice(0, safeCount); + const rowParser = compact ? parseChatMessageRowCompact : parseChatMessageRow; return { - messages: trimmedRows.reverse().map((row) => parseChatMessageRow(row)), + messages: trimmedRows.reverse().map((row) => rowParser(row)), hasMore, }; } @@ -505,6 +508,39 @@ export function extractSnippet(content: string, query: string): string { return (start > 0 ? '...' : '') + content.slice(start, end) + (end < content.length ? '...' : ''); } +/** + * Fetch the tool_metadata.content array for a single message. + * Used by the lazy-load endpoint to fetch content on demand. + */ +export function getMessageToolContent( + sql: SqlStorage, + sessionId: string, + messageId: string +): unknown[] | null { + const row = sql + .exec( + 'SELECT tool_metadata FROM chat_messages WHERE id = ? AND session_id = ?', + messageId, + sessionId + ) + .toArray()[0]; + + if (!row) return null; + + const rawMeta = row.tool_metadata; + if (typeof rawMeta !== 'string') return null; + + try { + const parsed = JSON.parse(rawMeta); + if (parsed && typeof parsed === 'object' && Array.isArray(parsed.content)) { + return parsed.content as unknown[]; + } + return null; + } catch { + return null; + } +} + export function persistSystemMessage( sql: SqlStorage, sessionId: string, diff --git a/apps/api/src/durable-objects/project-data/row-schemas.ts b/apps/api/src/durable-objects/project-data/row-schemas.ts index 71d671c8f..39894ce38 100644 --- a/apps/api/src/durable-objects/project-data/row-schemas.ts +++ b/apps/api/src/durable-objects/project-data/row-schemas.ts @@ -262,6 +262,54 @@ export function parseChatMessageRow(row: unknown): { }; } +/** + * Parse a chat message row in compact mode: strips the `content` array from + * tool_metadata and replaces it with a `contentSize` byte count. + * This dramatically reduces RPC payload size for tool-heavy sessions. + */ +export function parseChatMessageRowCompact(row: unknown): { + id: string; + sessionId: string; + role: string; + content: string; + toolMetadata: unknown; + createdAt: number; + sequence: number | null; +} { + const r = parseRow(ChatMessageRowSchema, row, 'chat_message'); + const parsed = safeParseJson(r.tool_metadata); + const toolMetadata = parsed !== null ? stripToolMetadataContent(parsed) : null; + return { + id: r.id, + sessionId: r.session_id, + role: r.role, + content: r.content, + toolMetadata, + createdAt: r.created_at, + sequence: r.sequence, + }; +} + +/** + * Strip the heavy `content` array from parsed tool_metadata, replacing it + * with a `contentSize` field indicating the byte count of the stripped content. + * Preserves all other metadata fields (toolCallId, title, kind, status, locations). + */ +const textEncoder = new TextEncoder(); + +export function stripToolMetadataContent(meta: unknown): unknown { + if (!meta || typeof meta !== 'object') return meta; + const obj = meta as Record; + const contentArray = obj.content; + if (!Array.isArray(contentArray) || contentArray.length === 0) return meta; + + const contentJson = JSON.stringify(contentArray); + const contentSize = textEncoder.encode(contentJson).byteLength; + + const rest = Object.fromEntries(Object.entries(obj).filter(([k]) => k !== 'content')); + return { ...rest, contentSize }; +} + /** Search result row (message + session join) */ const SearchResultRowSchema = v.object({ id: v.string(), diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index 29e30ace2..a65dbf04a 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -600,7 +600,8 @@ export interface Env { SAM_SEARCH_LIMIT?: string; // Default search results (default: 10) SAM_SEARCH_MAX_LIMIT?: string; // Max allowed search results (default: 50) SAM_HISTORY_LOAD_LIMIT?: string; // Max messages loaded on page mount (default: 200) - CHAT_SESSION_MESSAGE_LIMIT?: string; // Max messages per chat session REST response (default: 3000) + CHAT_SESSION_MESSAGE_LIMIT?: string; // Max messages per chat session REST response (default: 500) + CHAT_COMPACT_MODE_DEFAULT?: string; // Whether compact mode strips tool content by default (default: true) SAM_LLM_TIMEOUT_MS?: string; // LLM call timeout in ms (default: 120000) SAM_DISPATCH_MAX_DESCRIPTION_LENGTH?: string; // Max task description length for SAM dispatch (default: 32000) SAM_MESSAGE_MAX_LENGTH?: string; // Max message length for send_message_to_subtask (default: 32000) diff --git a/apps/api/src/routes/chat.ts b/apps/api/src/routes/chat.ts index 2d51ff596..b95640c2f 100644 --- a/apps/api/src/routes/chat.ts +++ b/apps/api/src/routes/chat.ts @@ -7,7 +7,7 @@ * See: specs/018-project-first-architecture/tasks.md (T027) */ import type { ChatSessionTaskEmbed } from '@simple-agent-manager/shared'; -import { DEFAULT_CHAT_SESSION_MESSAGE_LIMIT, isTaskExecutionStep, isTaskMode } from '@simple-agent-manager/shared'; +import { DEFAULT_CHAT_COMPACT_MODE, DEFAULT_CHAT_SESSION_MESSAGE_LIMIT, isTaskExecutionStep, isTaskMode } from '@simple-agent-manager/shared'; import { and, eq, inArray } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/d1'; import type { Context } from 'hono'; @@ -221,6 +221,9 @@ chatRoutes.get('/:sessionId', async (c) => { const beforeParam = c.req.query('before'); const before = beforeParam ? parseInt(beforeParam, 10) : null; + const compactDefault = (c.env.CHAT_COMPACT_MODE_DEFAULT ?? '').toLowerCase(); + const compact = compactDefault === 'false' ? false : DEFAULT_CHAT_COMPACT_MODE; + let messagesResult: Awaited>; try { messagesResult = await projectDataService.getMessages( @@ -228,7 +231,9 @@ chatRoutes.get('/:sessionId', async (c) => { projectId, sessionId, limit, - before + before, + undefined, + compact ); } catch (err) { return recordChatSessionLoadFailure(c, { @@ -318,6 +323,36 @@ chatRoutes.get('/:sessionId', async (c) => { }); }); +/** + * GET /api/projects/:projectId/sessions/:sessionId/messages/:messageId/tool-content + * Lazy-load the tool_metadata.content array for a single message. + * Used by compact mode: the session detail route strips tool content to reduce + * RPC payload size, and the frontend fetches content on demand when users expand + * individual tool call cards. + */ +chatRoutes.get('/:sessionId/messages/:messageId/tool-content', async (c) => { + const userId = getUserId(c); + const projectId = requireRouteParam(c, 'projectId'); + const sessionId = requireRouteParam(c, 'sessionId'); + const messageId = requireRouteParam(c, 'messageId'); + const db = drizzle(c.env.DATABASE, { schema }); + + await requireOwnedProject(db, projectId, userId); + + const content = await projectDataService.getMessageToolContent( + c.env, + projectId, + sessionId, + messageId + ); + + if (content === null) { + throw errors.notFound('Message tool content'); + } + + return c.json({ content }); +}); + /** * POST /api/projects/:projectId/sessions/:sessionId/stop * Stop a chat session. @@ -456,13 +491,15 @@ chatRoutes.post('/:sessionId/summarize', async (c) => { throw errors.notFound('Session not found'); } - // Fetch all messages for the session (up to 1000) + // Fetch all messages for the session (up to 1000) — compact=false to include full content for summarization const { messages: allMessages } = await projectDataService.getMessages( c.env, projectId, sessionId, 1000, - null + null, + undefined, + false ); if (allMessages.length === 0) { diff --git a/apps/api/src/services/project-data.ts b/apps/api/src/services/project-data.ts index f458a7682..447c63723 100644 --- a/apps/api/src/services/project-data.ts +++ b/apps/api/src/services/project-data.ts @@ -1,3 +1,4 @@ +// FILE SIZE EXCEPTION: DO proxy service — splitting creates import complexity without meaningful benefit. See .claude/rules/18-file-size-limits.md /** * Service layer for interacting with the per-project Durable Object. * @@ -153,10 +154,21 @@ export async function getMessages( sessionId: string, limit: number = 100, before: number | null = null, - roles?: string[] + roles?: string[], + compact: boolean = false ): Promise<{ messages: Record[]; hasMore: boolean }> { const stub = await getStub(env, projectId); - return stub.getMessages(sessionId, limit, before, roles); + return stub.getMessages(sessionId, limit, before, roles, compact); +} + +export async function getMessageToolContent( + env: Env, + projectId: string, + sessionId: string, + messageId: string +): Promise { + const stub = await getStub(env, projectId); + return stub.getMessageToolContent(sessionId, messageId); } /** Get total message count for a session, optionally filtered by roles. */ diff --git a/apps/api/tests/unit/durable-objects/compact-mode.test.ts b/apps/api/tests/unit/durable-objects/compact-mode.test.ts new file mode 100644 index 000000000..57018ba6d --- /dev/null +++ b/apps/api/tests/unit/durable-objects/compact-mode.test.ts @@ -0,0 +1,231 @@ +/** + * Unit tests for compact mode: stripToolMetadataContent and parseChatMessageRowCompact. + * + * Tests verify: + * - Content array is stripped and replaced with contentSize + * - Non-content metadata fields (toolCallId, title, kind, status, locations) are preserved + * - Empty/missing content arrays pass through unchanged + * - Non-object metadata passes through unchanged + * - contentSize accurately reflects UTF-8 byte count + * - parseChatMessageRowCompact produces compact output + */ +import { describe, expect, it } from 'vitest'; + +import { getMessageToolContent } from '../../../src/durable-objects/project-data/messages'; +import { + parseChatMessageRow, + parseChatMessageRowCompact, + stripToolMetadataContent, +} from '../../../src/durable-objects/project-data/row-schemas'; + +describe('stripToolMetadataContent', () => { + it('strips content array and adds contentSize', () => { + const meta = { + toolCallId: 'tc-123', + title: 'Read file', + kind: 'read', + status: 'completed', + locations: [{ path: '/foo/bar.ts', line: 42 }], + content: [ + { type: 'content', text: 'Hello world' }, + { type: 'diff', text: '--- a/file\n+++ b/file' }, + ], + }; + + const result = stripToolMetadataContent(meta) as Record; + + expect(result.toolCallId).toBe('tc-123'); + expect(result.title).toBe('Read file'); + expect(result.kind).toBe('read'); + expect(result.status).toBe('completed'); + expect(result.locations).toEqual([{ path: '/foo/bar.ts', line: 42 }]); + expect(result.content).toBeUndefined(); + expect(typeof result.contentSize).toBe('number'); + expect(result.contentSize).toBeGreaterThan(0); + }); + + it('preserves metadata when content is empty array', () => { + const meta = { toolCallId: 'tc-1', content: [] }; + const result = stripToolMetadataContent(meta); + expect(result).toEqual(meta); // empty array → no stripping + }); + + it('preserves metadata when content is missing', () => { + const meta = { toolCallId: 'tc-1', title: 'Write' }; + const result = stripToolMetadataContent(meta); + expect(result).toEqual(meta); + }); + + it('passes through null metadata', () => { + expect(stripToolMetadataContent(null)).toBeNull(); + }); + + it('passes through non-object metadata', () => { + expect(stripToolMetadataContent('string')).toBe('string'); + expect(stripToolMetadataContent(42)).toBe(42); + }); + + it('calculates contentSize as UTF-8 byte count', () => { + const content = [{ type: 'content', text: 'café' }]; // é is 2 bytes in UTF-8 + const meta = { content }; + const result = stripToolMetadataContent(meta) as Record; + + const expectedBytes = new TextEncoder().encode(JSON.stringify(content)).byteLength; + expect(result.contentSize).toBe(expectedBytes); + }); + + it('handles large content arrays', () => { + const largeContent = Array.from({ length: 100 }, (_, i) => ({ + type: 'content', + text: 'x'.repeat(1000), + index: i, + })); + const meta = { toolCallId: 'tc-big', content: largeContent }; + const result = stripToolMetadataContent(meta) as Record; + + expect(result.content).toBeUndefined(); + expect(result.contentSize).toBeGreaterThan(100_000); + expect(result.toolCallId).toBe('tc-big'); + }); +}); + +describe('parseChatMessageRowCompact', () => { + const makeRow = (toolMetadata: unknown) => ({ + id: 'msg-1', + session_id: 'sess-1', + role: 'tool', + content: 'tool output', + tool_metadata: toolMetadata ? JSON.stringify(toolMetadata) : null, + created_at: 1234567890, + sequence: 1, + }); + + it('strips content from tool_metadata in compact mode', () => { + const meta = { + toolCallId: 'tc-1', + title: 'Read', + content: [{ type: 'content', text: 'file contents here' }], + }; + const row = makeRow(meta); + + const compact = parseChatMessageRowCompact(row); + const tm = compact.toolMetadata as Record; + + expect(tm.toolCallId).toBe('tc-1'); + expect(tm.title).toBe('Read'); + expect(tm.content).toBeUndefined(); + expect(typeof tm.contentSize).toBe('number'); + expect(tm.contentSize).toBeGreaterThan(0); + }); + + it('preserves all other fields identically to parseChatMessageRow', () => { + const meta = { + toolCallId: 'tc-1', + title: 'Read', + content: [{ type: 'content', text: 'data' }], + }; + const row = makeRow(meta); + + const compact = parseChatMessageRowCompact(row); + const full = parseChatMessageRow(row); + + expect(compact.id).toBe(full.id); + expect(compact.sessionId).toBe(full.sessionId); + expect(compact.role).toBe(full.role); + expect(compact.content).toBe(full.content); + expect(compact.createdAt).toBe(full.createdAt); + expect(compact.sequence).toBe(full.sequence); + }); + + it('handles null tool_metadata', () => { + const row = makeRow(null); + const compact = parseChatMessageRowCompact(row); + expect(compact.toolMetadata).toBeNull(); + }); + + it('handles tool_metadata without content array', () => { + const meta = { toolCallId: 'tc-1', title: 'Read', status: 'completed' }; + const row = makeRow(meta); + const compact = parseChatMessageRowCompact(row); + const tm = compact.toolMetadata as Record; + + expect(tm.toolCallId).toBe('tc-1'); + expect(tm.content).toBeUndefined(); + expect(tm.contentSize).toBeUndefined(); + }); +}); + +describe('getMessageToolContent', () => { + function makeSql(rows: Record[]) { + return { + exec: () => ({ toArray: () => rows }), + } as unknown as import('@cloudflare/workers-types').SqlStorage; + } + + it('returns content array for a valid message with tool_metadata', () => { + const content = [{ type: 'content', text: 'hello' }]; + const sql = makeSql([{ tool_metadata: JSON.stringify({ toolCallId: 'tc-1', content }) }]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-1'); + expect(result).toEqual(content); + }); + + it('returns null when message is not found', () => { + const sql = makeSql([]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-missing'); + expect(result).toBeNull(); + }); + + it('returns null when tool_metadata is not a string', () => { + const sql = makeSql([{ tool_metadata: 42 }]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-1'); + expect(result).toBeNull(); + }); + + it('returns null when tool_metadata has no content array', () => { + const sql = makeSql([{ tool_metadata: JSON.stringify({ toolCallId: 'tc-1', title: 'Read' }) }]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-1'); + expect(result).toBeNull(); + }); + + it('returns null when tool_metadata is malformed JSON', () => { + const sql = makeSql([{ tool_metadata: '{bad json' }]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-1'); + expect(result).toBeNull(); + }); + + it('returns null when tool_metadata is null string', () => { + const sql = makeSql([{ tool_metadata: null }]); + const result = getMessageToolContent(sql, 'sess-1', 'msg-1'); + expect(result).toBeNull(); + }); +}); + +describe('compact vs full mode payload size comparison', () => { + it('compact mode produces significantly smaller output for tool-heavy messages', () => { + const makeToolRow = (i: number) => ({ + id: `msg-${i}`, + session_id: 'sess-1', + role: 'tool', + content: '', + tool_metadata: JSON.stringify({ + toolCallId: `tc-${i}`, + title: `Tool ${i}`, + kind: 'read', + status: 'completed', + locations: [{ path: `/src/file-${i}.ts`, line: i }], + content: [{ type: 'content', text: 'x'.repeat(10_000) }], + }), + created_at: Date.now() + i, + sequence: i, + }); + + const rows = Array.from({ length: 20 }, (_, i) => makeToolRow(i)); + + const fullSize = JSON.stringify(rows.map((r) => parseChatMessageRow(r))).length; + const compactSize = JSON.stringify(rows.map((r) => parseChatMessageRowCompact(r))).length; + + // Compact mode should be at least 80% smaller for content-heavy rows + const reduction = 1 - compactSize / fullSize; + expect(reduction).toBeGreaterThan(0.8); + }); +}); diff --git a/apps/api/tests/unit/routes/chat-session-agent-routing.test.ts b/apps/api/tests/unit/routes/chat-session-agent-routing.test.ts index de91c62fb..48bfd3d37 100644 --- a/apps/api/tests/unit/routes/chat-session-agent-routing.test.ts +++ b/apps/api/tests/unit/routes/chat-session-agent-routing.test.ts @@ -20,6 +20,7 @@ vi.mock('drizzle-orm/d1', () => ({ vi.mock('@simple-agent-manager/shared', () => ({ DEFAULT_CHAT_SESSION_MESSAGE_LIMIT: 3000, + DEFAULT_CHAT_COMPACT_MODE: true, DEFAULT_WORKSPACE_PROFILE: 'full', isTaskExecutionStep: () => true, isTaskMode: (v: unknown) => v === 'task' || v === 'conversation', @@ -231,6 +232,8 @@ describe('chatRoutes agent session routing', () => { 'chat-1', 500, null, + undefined, + true, ); }); @@ -253,6 +256,8 @@ describe('chatRoutes agent session routing', () => { 'chat-1', 3000, null, + undefined, + true, ); }); diff --git a/apps/api/tests/unit/routes/mcp.test.ts b/apps/api/tests/unit/routes/mcp.test.ts index fd9207f68..2aed37d6e 100644 --- a/apps/api/tests/unit/routes/mcp.test.ts +++ b/apps/api/tests/unit/routes/mcp.test.ts @@ -811,6 +811,7 @@ describe('MCP Routes', () => { expect.any(Number), null, ['user', 'assistant'], + false, ); }); }); @@ -1932,6 +1933,7 @@ describe('MCP Routes', () => { expect.any(Number), null, ['user', 'assistant'], + false, ); }); @@ -1949,6 +1951,7 @@ describe('MCP Routes', () => { expect.any(Number), null, ['user', 'assistant'], + false, ); }); @@ -1967,6 +1970,7 @@ describe('MCP Routes', () => { expect.any(Number), null, ['user', 'assistant'], + false, ); }); }); diff --git a/apps/web/src/components/project-message-view/AcpConversationItemView.tsx b/apps/web/src/components/project-message-view/AcpConversationItemView.tsx index 15621ae8f..286ab0976 100644 --- a/apps/web/src/components/project-message-view/AcpConversationItemView.tsx +++ b/apps/web/src/components/project-message-view/AcpConversationItemView.tsx @@ -1,4 +1,4 @@ -import type { ConversationItem } from '@simple-agent-manager/acp-client'; +import type { ConversationItem, ToolCallContentItem } from '@simple-agent-manager/acp-client'; import { MessageBubble as AcpMessageBubble, PlanView, @@ -51,7 +51,11 @@ export function SystemMessageBubble({ text }: { text: string }) { } /** Renders a single ACP ConversationItem using the shared acp-client components. */ -export function AcpConversationItemView({ item, onFileClick }: { item: ConversationItem; onFileClick?: (path: string, line?: number | null) => void }) { +export function AcpConversationItemView({ item, onFileClick, onLoadToolContent }: { + item: ConversationItem; + onFileClick?: (path: string, line?: number | null) => void; + onLoadToolContent?: (messageId: string) => Promise; +}) { const globalAudio = useGlobalAudio(); const handlePlayAudio = item.kind === 'agent_message' @@ -78,7 +82,7 @@ export function AcpConversationItemView({ item, onFileClick }: { item: Conversat case 'thinking': return ; case 'tool_call': - return ; + return ; case 'plan': return ; case 'system_message': diff --git a/apps/web/src/components/project-message-view/index.tsx b/apps/web/src/components/project-message-view/index.tsx index ea6e63a39..d867c1116 100644 --- a/apps/web/src/components/project-message-view/index.tsx +++ b/apps/web/src/components/project-message-view/index.tsx @@ -1,10 +1,11 @@ -import type { PlanItem } from '@simple-agent-manager/acp-client'; -import { PlanModal } from '@simple-agent-manager/acp-client'; +import type { PlanItem, ToolCallContentItem } from '@simple-agent-manager/acp-client'; +import { mapToolCallContent, PlanModal } from '@simple-agent-manager/acp-client'; import { Button, Spinner } from '@simple-agent-manager/ui'; import { ChevronDown, ListChecks } from 'lucide-react'; -import { type FC, useRef, useState } from 'react'; +import { type FC, useCallback, useRef, useState } from 'react'; import { Virtuoso, type VirtuosoHandle } from 'react-virtuoso'; +import { getMessageToolContent } from '../../lib/api/sessions'; import { ChatFilePanel } from '../chat/ChatFilePanel'; import { TruncatedSummary } from '../chat/TruncatedSummary'; import { AcpConversationItemView } from './AcpConversationItemView'; @@ -46,6 +47,12 @@ export const ProjectMessageView: FC = ({ const lc = useSessionLifecycle(projectId, sessionId, isProvisioning, onSessionMutated); + /** Lazy-load tool content for a compact-mode tool call card. */ + const handleLoadToolContent = useCallback(async (messageId: string): Promise => { + const { content } = await getMessageToolContent(projectId, sessionId, messageId); + return (content as Array<{ type: string } & Record>).map((c) => mapToolCallContent(c)); + }, [projectId, sessionId]); + // Initial load — only show full spinner when no data exists yet if (lc.loading && lc.messages.length === 0 && !lc.session) { return ( @@ -181,7 +188,7 @@ export const ProjectMessageView: FC = ({ overscan={200} itemContent={(_index, item) => (
- +
)} components={{ diff --git a/apps/web/src/components/project-message-view/types.ts b/apps/web/src/components/project-message-view/types.ts index 1174d407d..f505809df 100644 --- a/apps/web/src/components/project-message-view/types.ts +++ b/apps/web/src/components/project-message-view/types.ts @@ -212,12 +212,17 @@ export function chatMessagesToConversationItems(msgs: ChatMessageResponse[]): Co // Use structured content from metadata when available; fall back to raw content field. // Content items are now stored as raw ACP JSON (same shape as real-time WebSocket), // so we pass them through mapToolCallContent — the same function the real-time path uses. + // In compact mode, content is stripped and contentSize is provided instead. const structuredContent = meta?.content as Array<{ type: string } & Record> | undefined; + const contentSize = typeof meta?.contentSize === 'number' ? meta.contentSize : undefined; + const isCompact = !structuredContent && contentSize !== undefined && contentSize > 0; let contentItems: Array<{ type: 'content' | 'diff' | 'terminal'; text?: string; data?: unknown }>; if (Array.isArray(structuredContent) && structuredContent.length > 0) { contentItems = structuredContent.map((c) => mapToolCallContent(c)); - } else { + } else if (!isCompact) { contentItems = isPlaceholderContent(msg.content) ? [] : [{ type: 'content' as const, text: msg.content }]; + } else { + contentItems = []; } // Deduplicate tool calls by toolCallId: merge updates into existing tool call @@ -244,6 +249,7 @@ export function chatMessagesToConversationItems(msgs: ChatMessageResponse[]): Co content: contentItems, locations: locations.map((l) => ({ path: l.path ?? '', line: l.line ?? null })), timestamp: msg.createdAt, + ...(isCompact ? { contentSize, contentLoaded: false, messageId: msg.id } : {}), }); if (toolCallId) { toolCallMap.set(toolCallId, idx); diff --git a/apps/web/src/lib/api/sessions.ts b/apps/web/src/lib/api/sessions.ts index fed324993..a79f51c70 100644 --- a/apps/web/src/lib/api/sessions.ts +++ b/apps/web/src/lib/api/sessions.ts @@ -102,6 +102,20 @@ export async function getChatSession( return request(endpoint, params.signal ? { signal: params.signal } : {}); } +/** + * Lazy-load the tool_metadata.content array for a single message. + * Used by compact mode when the user expands a tool call card. + */ +export async function getMessageToolContent( + projectId: string, + sessionId: string, + messageId: string +): Promise<{ content: unknown[] }> { + return request<{ content: unknown[] }>( + `/api/projects/${projectId}/sessions/${sessionId}/messages/${messageId}/tool-content` + ); +} + export async function createChatSession( projectId: string, data: { workspaceId?: string; topic?: string } = {} diff --git a/packages/acp-client/src/components/ToolCallCard.tsx b/packages/acp-client/src/components/ToolCallCard.tsx index 46c9fc96d..7454279cd 100644 --- a/packages/acp-client/src/components/ToolCallCard.tsx +++ b/packages/acp-client/src/components/ToolCallCard.tsx @@ -8,6 +8,8 @@ interface ToolCallCardProps { toolCall: ToolCallItem; /** Called when a file location is clicked. Receives the file path and optional line number. */ onFileClick?: (path: string, line?: number | null) => void; + /** Called to lazy-load tool content when expanding a compact tool call. Returns the content items. */ + onLoadContent?: (messageId: string) => Promise; } /** Status icon for tool call state */ @@ -40,9 +42,35 @@ function StatusIcon({ status }: { status: ToolCallItem['status'] }) { * Wrapped in React.memo to prevent re-renders when parent state changes * don't affect this component's props. */ -export const ToolCallCard = React.memo(function ToolCallCard({ toolCall, onFileClick }: ToolCallCardProps) { +export const ToolCallCard = React.memo(function ToolCallCard({ toolCall, onFileClick, onLoadContent }: ToolCallCardProps) { const [expanded, setExpanded] = useState(false); - const hasContent = toolCall.content.some(hasRenderableContent); + const [lazyContent, setLazyContent] = useState(null); + const [loading, setLoading] = useState(false); + const [loadFailed, setLoadFailed] = useState(false); + + const needsLazyLoad = toolCall.contentLoaded === false && !!toolCall.messageId && !!onLoadContent; + const hasContent = needsLazyLoad || toolCall.content.some(hasRenderableContent); + const displayContent = lazyContent ?? toolCall.content; + + const handleToggle = async () => { + if (!hasContent) return; + + if (!expanded && needsLazyLoad && !lazyContent && !loadFailed) { + setExpanded(true); + setLoading(true); + setLoadFailed(false); + try { + const content = await onLoadContent!(toolCall.messageId!); + setLazyContent(content); + } catch { + setLoadFailed(true); + } finally { + setLoading(false); + } + } else { + setExpanded(!expanded); + } + }; return (
@@ -50,8 +78,8 @@ export const ToolCallCard = React.memo(function ToolCallCard({ toolCall, onFileC
hasContent && setExpanded(!expanded)} - onKeyDown={(e) => { if ((e.key === 'Enter' || e.key === ' ') && hasContent) { e.preventDefault(); setExpanded(!expanded); } }} + onClick={handleToggle} + onKeyDown={(e) => { if ((e.key === 'Enter' || e.key === ' ') && hasContent) { e.preventDefault(); void handleToggle(); } }} aria-expanded={hasContent ? expanded : undefined} className={`w-full flex items-center gap-2 px-3 py-2 bg-gray-50 text-left ${hasContent ? 'cursor-pointer hover:bg-gray-100' : 'cursor-default'}`} > @@ -87,6 +115,11 @@ export const ToolCallCard = React.memo(function ToolCallCard({ toolCall, onFileC ) )}
+ {needsLazyLoad && !lazyContent && toolCall.contentSize != null && ( + + {formatBytes(toolCall.contentSize)} + + )} {hasContent && ( - {toolCall.content.map((content, idx) => ( - - ))} + {loading ? ( +
Loading content…
+ ) : loadFailed ? ( +
+ + Failed to load content. +
+ ) : ( + displayContent.map((content, idx) => ( + + )) + )}
)} @@ -188,3 +232,11 @@ function safeStringify(value: unknown): string { return ''; } } + +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + const kb = bytes / 1024; + if (kb < 1024) return `${kb.toFixed(1)} KB`; + const mb = kb / 1024; + return `${mb.toFixed(1)} MB`; +} diff --git a/packages/acp-client/src/hooks/useAcpMessages.ts b/packages/acp-client/src/hooks/useAcpMessages.ts index 42677e68b..8a815aeb7 100644 --- a/packages/acp-client/src/hooks/useAcpMessages.ts +++ b/packages/acp-client/src/hooks/useAcpMessages.ts @@ -40,6 +40,12 @@ export interface ToolCallItem { content: ToolCallContentItem[]; locations: Array<{ path: string; line?: number | null }>; timestamp: number; + /** Byte size of stripped content (present when loaded in compact mode). */ + contentSize?: number; + /** Whether content has been lazy-loaded (false = needs fetch on expand). */ + contentLoaded?: boolean; + /** Message ID for lazy-loading content via the tool-content endpoint. */ + messageId?: string; } export interface ToolCallContentItem { diff --git a/packages/shared/src/constants/defaults.ts b/packages/shared/src/constants/defaults.ts index e57254ce5..605e2425e 100644 --- a/packages/shared/src/constants/defaults.ts +++ b/packages/shared/src/constants/defaults.ts @@ -61,6 +61,14 @@ export const DEFAULT_TASK_LIST_MAX_PAGE_SIZE = 200; */ export const DEFAULT_CHAT_SESSION_MESSAGE_LIMIT = 500; +/** + * Whether chat session message loads strip tool_metadata.content by default. + * When true, tool call content is lazy-loaded on demand when users expand + * individual tool calls, dramatically reducing RPC payload size. + * Override via CHAT_COMPACT_MODE_DEFAULT env var. + */ +export const DEFAULT_CHAT_COMPACT_MODE = true; + /** Default callback timeout for delegated task updates in milliseconds. */ export const DEFAULT_TASK_CALLBACK_TIMEOUT_MS = 10000; diff --git a/packages/shared/src/constants/index.ts b/packages/shared/src/constants/index.ts index 9c3829f48..f4bf6aae5 100644 --- a/packages/shared/src/constants/index.ts +++ b/packages/shared/src/constants/index.ts @@ -28,6 +28,7 @@ export { STATUS_COLORS,STATUS_LABELS } from './status'; // Defaults & Limits export { DEFAULT_BRANCH, + DEFAULT_CHAT_COMPACT_MODE, DEFAULT_CHAT_SESSION_MESSAGE_LIMIT, DEFAULT_DASHBOARD_INACTIVE_THRESHOLD_MS, DEFAULT_DASHBOARD_POLL_INTERVAL_MS, diff --git a/tasks/backlog/2026-05-06-compact-mode-tool-content-lazy-load.md b/tasks/archive/2026-05-06-compact-mode-tool-content-lazy-load.md similarity index 100% rename from tasks/backlog/2026-05-06-compact-mode-tool-content-lazy-load.md rename to tasks/archive/2026-05-06-compact-mode-tool-content-lazy-load.md