Skip to content
Merged
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ Domains chain together: competitive research feeds marketing and business strate

## Recent Changes

- compact-mode-lazy-load-tool-content: Chat message compact mode with lazy-load tool content — `getMessages()` in `apps/api/src/durable-objects/project-data/messages.ts` gains `compact` parameter (default false); when compact=true, `parseChatMessageRowCompact()` in `row-schemas.ts` strips `tool_metadata.content` array and replaces it with `contentSize` (UTF-8 byte count), reducing RPC payload by 80-90% for tool-heavy sessions; new `getMessageToolContent()` method fetches individual message content on demand; chat session detail route (`GET /:sessionId`) in `apps/api/src/routes/chat.ts` defaults to compact=true (configurable via `CHAT_COMPACT_MODE_DEFAULT` env var); new `GET /:sessionId/messages/:messageId/tool-content` endpoint returns full content for lazy loading; `ToolCallItem` in `packages/acp-client` gains optional `contentSize`, `contentLoaded`, `messageId` fields; `ToolCallCard` component shows "Load content (X KB)" hint when compact, fetches content on expand via `onLoadContent` callback, caches in component state; `chatMessagesToConversationItems()` in `apps/web/src/components/project-message-view/types.ts` detects compact metadata and wires messageId for lazy loading; `getMessageToolContent()` API client in `apps/web/src/lib/api/sessions.ts`; configurable via CHAT_COMPACT_MODE_DEFAULT (default: true)
- harness-track-d-integration-design: SAM-native harness Track D design — `docs/architecture/agent-harness-integration.md` maps the Go harness, Cloudflare Sandbox SDK, and AI Gateway prototype evidence into production runtime choices for ProjectAgent/SamSession and future VM harness execution; shared model routing helper `filterModelsForAgentLoop()` plus `DEFAULT_SANDBOX_MODEL`, `DEFAULT_SANDBOX_AGENT_MAX_TURNS`, and `AGENT_LOOP_MIN_TOOL_CALL_SUPPORT` live in `packages/shared/src/constants/ai-services.ts`; sandbox result/config types live in `packages/shared/src/types/sandbox.ts`; the default Workers AI tool-loop model is Gemma 4 26B based on the 2026-05-05 harness evaluation, with Qwen 2.5 Coder retained as a fallback.
- ai-proxy-universal-tracking: Universal AI proxy passthrough for usage tracking — URL-path-based proxy routes (`apps/api/src/routes/ai-proxy-passthrough.ts`) at `/ai/proxy/:wstoken/anthropic/v1/messages`, `/ai/proxy/:wstoken/anthropic/v1/messages/count_tokens`, `/ai/proxy/:wstoken/openai/v1/chat/completions` embed workspace callback token in URL path, freeing auth headers for user's own API keys; runtime.ts (`apps/api/src/routes/workspaces/runtime.ts`) returns `inferenceConfig` when AI proxy is enabled and the credential can be forwarded upstream — two modes: `apiKeySource: 'user-credential'` with provider `anthropic-passthrough` or `openai-passthrough` (user has own API key, passthrough proxy for tracking only) and `apiKeySource: 'callback-token'` with provider `anthropic-proxy` or `openai-proxy` (platform proxy, existing behavior); Claude Code OAuth tokens are intentionally excluded from Anthropic passthrough and returned directly so the VM agent injects `CLAUDE_CODE_OAUTH_TOKEN`; base URLs use `{wstoken}` placeholder replaced at injection time by VM agent (`packages/vm-agent/internal/acp/session_host.go`); per-user RPM rate limiting and daily token budget applied; `cf-aig-metadata` injected for cost attribution via AI Gateway; user API keys forwarded via `x-api-key` (Anthropic) or `Authorization` (OpenAI) headers; configurable via AI_PROXY_ENABLED, AI_PROXY_RATE_LIMIT_RPM, AI_GATEWAY_ID, CF_ACCOUNT_ID
- user-ai-budget-controls: User-facing AI budget controls — `GET /api/usage/ai/budget` route (`apps/api/src/routes/usage.ts`) returns user's budget settings, daily usage, effective limits (3-tier resolution: user → env → constant), monthly cost from AI Gateway logs, utilization percentages, and exceeded flag; `PUT /api/usage/ai/budget` validates and saves custom budget settings to KV (`ai-budget-settings:{userId}`); `DELETE /api/usage/ai/budget` resets to platform defaults; budget service (`apps/api/src/services/ai-token-budget.ts`) provides `getUserBudgetSettings()`, `saveUserBudgetSettings()`, `deleteUserBudgetSettings()`, `validateBudgetUpdate()`, `resolveEffectiveLimits()`, `checkTokenBudget()`, `incrementTokenUsage()`; `BudgetSettingsSection` component in `SettingsComputeUsage.tsx` with `BudgetBar` utilization progress bars (color-coded: green < 80%, yellow 80-99%, red ≥ 100%), budget exceeded alert banner, Configure/Save/Cancel form with daily input/output token limits, monthly cost cap, alert threshold; shared types `UserAiBudgetSettings`, `UserAiBudgetResponse`, `UpdateAiBudgetRequest` in `packages/shared/src/types/ai-usage.ts`; configurable via AI_PROXY_DAILY_INPUT_TOKEN_LIMIT (default: 500000), AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT (default: 200000), AI_USAGE_MAX_DAILY_TOKEN_LIMIT (default: 10000000), AI_USAGE_MAX_MONTHLY_COST_CAP_USD (default: 1000), AI_USAGE_MIN_DAILY_TOKEN_LIMIT (default: 1000), AI_USAGE_MIN_MONTHLY_COST_CAP_USD (default: 0.01), AI_USAGE_BUDGET_TTL_SECONDS (default: 90000)
Expand Down
3 changes: 3 additions & 0 deletions apps/api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,9 @@ BASE_DOMAIN=workspaces.example.com
# SEGMENT_FETCH_TIMEOUT_MS=30000 # Timeout for Segment API fetch
# GA4_FETCH_TIMEOUT_MS=30000 # Timeout for GA4 API fetch

# Chat compact mode (message size optimization)
# CHAT_COMPACT_MODE_DEFAULT=true # Strip tool_metadata.content on read path (default: true)

# File proxy configuration (chat file browser)
# FILE_PROXY_TIMEOUT_MS=15000 # Timeout for VM agent file proxy requests (ms)
# FILE_PROXY_MAX_RESPONSE_BYTES=2097152 # Max response size for text file proxy (2MB)
Expand Down
8 changes: 6 additions & 2 deletions apps/api/src/durable-objects/project-data/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,12 @@ export class ProjectData extends DurableObject<Env> {
return result ? this.addBaseDomain(result) : null;
}

async getMessages(sessionId: string, limit: number = 1000, before: number | null = null, roles?: string[]) {
return messages.getMessages(this.sql, sessionId, limit, before, roles);
async getMessages(sessionId: string, limit: number = 1000, before: number | null = null, roles?: string[], compact: boolean = false) {
return messages.getMessages(this.sql, sessionId, limit, before, roles, compact);
}

async getMessageToolContent(sessionId: string, messageId: string): Promise<unknown[] | null> {
return messages.getMessageToolContent(this.sql, sessionId, messageId);
}

getMessageCount(sessionId: string, roles?: string[]): number {
Expand Down
40 changes: 38 additions & 2 deletions apps/api/src/durable-objects/project-data/messages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import { log } from '../../lib/logger';
import {
parseChatMessageRow,
parseChatMessageRowCompact,
parseCount,
parseMaxSeq,
parseMessageCount,
Expand Down Expand Up @@ -279,7 +280,8 @@ export function getMessages(
sessionId: string,
limit: number = 1000,
before: number | null = null,
roles?: string[]
roles?: string[],
compact: boolean = false
): { messages: Record<string, unknown>[]; hasMore: boolean } {
let query =
'SELECT id, session_id, role, content, tool_metadata, created_at, sequence FROM chat_messages WHERE session_id = ?';
Expand Down Expand Up @@ -329,8 +331,9 @@ export function getMessages(

const trimmedRows = candidateRows.slice(0, safeCount);

const rowParser = compact ? parseChatMessageRowCompact : parseChatMessageRow;
return {
messages: trimmedRows.reverse().map((row) => parseChatMessageRow(row)),
messages: trimmedRows.reverse().map((row) => rowParser(row)),
hasMore,
};
}
Expand Down Expand Up @@ -505,6 +508,39 @@ export function extractSnippet(content: string, query: string): string {
return (start > 0 ? '...' : '') + content.slice(start, end) + (end < content.length ? '...' : '');
}

/**
* Fetch the tool_metadata.content array for a single message.
* Used by the lazy-load endpoint to fetch content on demand.
*/
export function getMessageToolContent(
sql: SqlStorage,
sessionId: string,
messageId: string
): unknown[] | null {
const row = sql
.exec(
'SELECT tool_metadata FROM chat_messages WHERE id = ? AND session_id = ?',
messageId,
sessionId
)
.toArray()[0];

if (!row) return null;

const rawMeta = row.tool_metadata;
if (typeof rawMeta !== 'string') return null;

try {
const parsed = JSON.parse(rawMeta);
if (parsed && typeof parsed === 'object' && Array.isArray(parsed.content)) {
return parsed.content as unknown[];
}
return null;
} catch {
return null;
}
}

export function persistSystemMessage(
sql: SqlStorage,
sessionId: string,
Expand Down
48 changes: 48 additions & 0 deletions apps/api/src/durable-objects/project-data/row-schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,54 @@
};
}

/**
* Parse a chat message row in compact mode: strips the `content` array from
* tool_metadata and replaces it with a `contentSize` byte count.
* This dramatically reduces RPC payload size for tool-heavy sessions.
*/
export function parseChatMessageRowCompact(row: unknown): {
id: string;
sessionId: string;
role: string;
content: string;
toolMetadata: unknown;
createdAt: number;
sequence: number | null;
} {
const r = parseRow(ChatMessageRowSchema, row, 'chat_message');
const parsed = safeParseJson(r.tool_metadata);
const toolMetadata = parsed !== null ? stripToolMetadataContent(parsed) : null;

Check warning on line 281 in apps/api/src/durable-objects/project-data/row-schemas.ts

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Unexpected negated condition.

See more on https://sonarcloud.io/project/issues?id=raphaeltm_simple-agent-manager&issues=AZ39VrnTlBmI9aZE-H7n&open=AZ39VrnTlBmI9aZE-H7n&pullRequest=919
return {
id: r.id,
sessionId: r.session_id,
role: r.role,
content: r.content,
toolMetadata,
createdAt: r.created_at,
sequence: r.sequence,
};
}

/**
* Strip the heavy `content` array from parsed tool_metadata, replacing it
* with a `contentSize` field indicating the byte count of the stripped content.
* Preserves all other metadata fields (toolCallId, title, kind, status, locations).
*/
const textEncoder = new TextEncoder();

export function stripToolMetadataContent(meta: unknown): unknown {
if (!meta || typeof meta !== 'object') return meta;
const obj = meta as Record<string, unknown>;
const contentArray = obj.content;
if (!Array.isArray(contentArray) || contentArray.length === 0) return meta;

const contentJson = JSON.stringify(contentArray);
const contentSize = textEncoder.encode(contentJson).byteLength;

const rest = Object.fromEntries(Object.entries(obj).filter(([k]) => k !== 'content'));
return { ...rest, contentSize };
}

/** Search result row (message + session join) */
const SearchResultRowSchema = v.object({
id: v.string(),
Expand Down
3 changes: 2 additions & 1 deletion apps/api/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -600,7 +600,8 @@ export interface Env {
SAM_SEARCH_LIMIT?: string; // Default search results (default: 10)
SAM_SEARCH_MAX_LIMIT?: string; // Max allowed search results (default: 50)
SAM_HISTORY_LOAD_LIMIT?: string; // Max messages loaded on page mount (default: 200)
CHAT_SESSION_MESSAGE_LIMIT?: string; // Max messages per chat session REST response (default: 3000)
CHAT_SESSION_MESSAGE_LIMIT?: string; // Max messages per chat session REST response (default: 500)
CHAT_COMPACT_MODE_DEFAULT?: string; // Whether compact mode strips tool content by default (default: true)
SAM_LLM_TIMEOUT_MS?: string; // LLM call timeout in ms (default: 120000)
SAM_DISPATCH_MAX_DESCRIPTION_LENGTH?: string; // Max task description length for SAM dispatch (default: 32000)
SAM_MESSAGE_MAX_LENGTH?: string; // Max message length for send_message_to_subtask (default: 32000)
Expand Down
45 changes: 41 additions & 4 deletions apps/api/src/routes/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* See: specs/018-project-first-architecture/tasks.md (T027)
*/
import type { ChatSessionTaskEmbed } from '@simple-agent-manager/shared';
import { DEFAULT_CHAT_SESSION_MESSAGE_LIMIT, isTaskExecutionStep, isTaskMode } from '@simple-agent-manager/shared';
import { DEFAULT_CHAT_COMPACT_MODE, DEFAULT_CHAT_SESSION_MESSAGE_LIMIT, isTaskExecutionStep, isTaskMode } from '@simple-agent-manager/shared';
import { and, eq, inArray } from 'drizzle-orm';
import { drizzle } from 'drizzle-orm/d1';
import type { Context } from 'hono';
Expand Down Expand Up @@ -221,14 +221,19 @@ chatRoutes.get('/:sessionId', async (c) => {
const beforeParam = c.req.query('before');
const before = beforeParam ? parseInt(beforeParam, 10) : null;

const compactDefault = (c.env.CHAT_COMPACT_MODE_DEFAULT ?? '').toLowerCase();
const compact = compactDefault === 'false' ? false : DEFAULT_CHAT_COMPACT_MODE;

let messagesResult: Awaited<ReturnType<typeof projectDataService.getMessages>>;
try {
messagesResult = await projectDataService.getMessages(
c.env,
projectId,
sessionId,
limit,
before
before,
undefined,
compact
);
} catch (err) {
return recordChatSessionLoadFailure(c, {
Expand Down Expand Up @@ -318,6 +323,36 @@ chatRoutes.get('/:sessionId', async (c) => {
});
});

/**
* GET /api/projects/:projectId/sessions/:sessionId/messages/:messageId/tool-content
* Lazy-load the tool_metadata.content array for a single message.
* Used by compact mode: the session detail route strips tool content to reduce
* RPC payload size, and the frontend fetches content on demand when users expand
* individual tool call cards.
*/
chatRoutes.get('/:sessionId/messages/:messageId/tool-content', async (c) => {
const userId = getUserId(c);
const projectId = requireRouteParam(c, 'projectId');
const sessionId = requireRouteParam(c, 'sessionId');
const messageId = requireRouteParam(c, 'messageId');
const db = drizzle(c.env.DATABASE, { schema });

await requireOwnedProject(db, projectId, userId);

const content = await projectDataService.getMessageToolContent(
c.env,
projectId,
sessionId,
messageId
);

if (content === null) {
throw errors.notFound('Message tool content');
}

return c.json({ content });
});

/**
* POST /api/projects/:projectId/sessions/:sessionId/stop
* Stop a chat session.
Expand Down Expand Up @@ -456,13 +491,15 @@ chatRoutes.post('/:sessionId/summarize', async (c) => {
throw errors.notFound('Session not found');
}

// Fetch all messages for the session (up to 1000)
// Fetch all messages for the session (up to 1000) — compact=false to include full content for summarization
const { messages: allMessages } = await projectDataService.getMessages(
c.env,
projectId,
sessionId,
1000,
null
null,
undefined,
false
);

if (allMessages.length === 0) {
Expand Down
16 changes: 14 additions & 2 deletions apps/api/src/services/project-data.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// FILE SIZE EXCEPTION: DO proxy service — splitting creates import complexity without meaningful benefit. See .claude/rules/18-file-size-limits.md
/**
* Service layer for interacting with the per-project Durable Object.
*
Expand Down Expand Up @@ -153,10 +154,21 @@ export async function getMessages(
sessionId: string,
limit: number = 100,
before: number | null = null,
roles?: string[]
roles?: string[],
compact: boolean = false
): Promise<{ messages: Record<string, unknown>[]; hasMore: boolean }> {
const stub = await getStub(env, projectId);
return stub.getMessages(sessionId, limit, before, roles);
return stub.getMessages(sessionId, limit, before, roles, compact);
}

export async function getMessageToolContent(
env: Env,
projectId: string,
sessionId: string,
messageId: string
): Promise<unknown[] | null> {
const stub = await getStub(env, projectId);
return stub.getMessageToolContent(sessionId, messageId);
}

/** Get total message count for a session, optionally filtered by roles. */
Expand Down
Loading
Loading