diff --git a/apps/gateway-admin/components/chat/chat-input.tsx b/apps/gateway-admin/components/chat/chat-input.tsx index 35e312e78..cd17dc686 100644 --- a/apps/gateway-admin/components/chat/chat-input.tsx +++ b/apps/gateway-admin/components/chat/chat-input.tsx @@ -12,6 +12,7 @@ import { DropdownMenuTrigger, } from '@/components/ui/dropdown-menu' import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip' +import { ToggleGroup, ToggleGroupItem } from '@/components/ui/toggle-group' import type { ACPAgent, ACPModelOption } from './types' import { createLocalAttachmentDraft, @@ -19,6 +20,8 @@ import { revokeLocalAttachmentPreview, validateLocalFiles, } from '@/lib/chat/local-attachments' +import { groupModels } from '@/lib/chat/model-grouping' +import { nextNavIndex, useListKeyboard } from '@/lib/chat/use-list-keyboard' import type { AttachmentRef, LocalAttachmentDraft, PromptAttachmentRef } from '@/lib/fs/types' import { isInlineImageMime, previewWorkspaceFile } from '@/lib/fs/client' import { WorkspacePicker } from './workspace-picker' @@ -89,10 +92,15 @@ export function ChatInput({ const modelPickerRef = React.useRef(null) const modelTriggerRef = React.useRef(null) const modelOptionRefs = React.useRef>([]) - const [activeAgentIndex, setActiveAgentIndex] = React.useState(0) - const [activeModelIndex, setActiveModelIndex] = React.useState(0) + const agentNav = useListKeyboard({ count: agents.length }) + const modelNav = useListKeyboard({ count: modelOptions.length }) + const activeAgentIndex = agentNav.activeIndex + const setActiveAgentIndex = agentNav.setActiveIndex + const activeModelIndex = modelNav.activeIndex + const setActiveModelIndex = modelNav.setActiveIndex const pickerId = React.useId() const modelPickerId = React.useId() + const groupedModels = React.useMemo(() => groupModels(modelOptions), [modelOptions]) const value = draftText ?? uncontrolledValue const setValue = React.useCallback( @@ -255,6 +263,16 @@ export function ChatInput({ const selectedIndex = Math.max(modelOptions.findIndex((model) => model.id === selectedModel?.id), 0) setActiveModelIndex(selectedIndex) const frame = window.requestAnimationFrame(() => { + if (groupedModels.kind === 'grouped') { + // Flat-mode refs don't exist in grouped mode; pull focus onto the + // currently-selected toggle (or the first toggle as a fallback) so + // keyboard navigation immediately enters the picker. + const pickerEl = document.getElementById(modelPickerId) + const selectedToggle = pickerEl?.querySelector('[data-state="on"]') + const firstToggle = pickerEl?.querySelector('button') + ;(selectedToggle ?? firstToggle)?.focus() + return + } modelOptionRefs.current[selectedIndex]?.focus() }) @@ -270,7 +288,7 @@ export function ChatInput({ window.cancelAnimationFrame(frame) document.removeEventListener('mousedown', handlePointerDown) } - }, [modelPickerOpen, modelOptions, selectedModel?.id]) + }, [modelPickerOpen, modelOptions, selectedModel?.id, groupedModels.kind, modelPickerId]) const selectAgent = (agentId: string) => { onSelectAgent(agentId) @@ -300,26 +318,17 @@ export function ChatInput({ if (agents.length === 0) return - const moveTo = (nextIndex: number) => { - setActiveAgentIndex(nextIndex) - optionRefs.current[nextIndex]?.focus() + if ((event.key === 'Enter' || event.key === ' ') && agents[activeAgentIndex]) { + event.preventDefault() + selectAgent(agents[activeAgentIndex].id) + return } - if (event.key === 'ArrowDown') { - event.preventDefault() - moveTo((activeAgentIndex + 1) % agents.length) - } else if (event.key === 'ArrowUp') { + const next = nextNavIndex(activeAgentIndex, event.key, agents.length) + if (next !== null) { event.preventDefault() - moveTo((activeAgentIndex - 1 + agents.length) % agents.length) - } else if (event.key === 'Home') { - event.preventDefault() - moveTo(0) - } else if (event.key === 'End') { - event.preventDefault() - moveTo(agents.length - 1) - } else if ((event.key === 'Enter' || event.key === ' ') && agents[activeAgentIndex]) { - event.preventDefault() - selectAgent(agents[activeAgentIndex].id) + setActiveAgentIndex(next) + optionRefs.current[next]?.focus() } } @@ -363,26 +372,21 @@ export function ChatInput({ if (modelOptions.length === 0) return - const moveTo = (nextIndex: number) => { - setActiveModelIndex(nextIndex) - modelOptionRefs.current[nextIndex]?.focus() - } + // In grouped mode each row owns a ToggleGroup with its own keyboard nav — + // bow out so we don't double-handle arrow keys. + if (groupedModels.kind === 'grouped') return - if (event.key === 'ArrowDown') { - event.preventDefault() - moveTo((activeModelIndex + 1) % modelOptions.length) - } else if (event.key === 'ArrowUp') { - event.preventDefault() - moveTo((activeModelIndex - 1 + modelOptions.length) % modelOptions.length) - } else if (event.key === 'Home') { - event.preventDefault() - moveTo(0) - } else if (event.key === 'End') { - event.preventDefault() - moveTo(modelOptions.length - 1) - } else if ((event.key === 'Enter' || event.key === ' ') && modelOptions[activeModelIndex]) { + if ((event.key === 'Enter' || event.key === ' ') && modelOptions[activeModelIndex]) { event.preventDefault() selectModel(modelOptions[activeModelIndex].id) + return + } + + const next = nextNavIndex(activeModelIndex, event.key, modelOptions.length) + if (next !== null) { + event.preventDefault() + setActiveModelIndex(next) + modelOptionRefs.current[next]?.focus() } } @@ -516,9 +520,13 @@ export function ChatInput({ {modelPickerOpen && (
- {modelOptions.map((model, index) => { - const optionButton = ( - - ) - if (!model.description) return optionButton - return ( - - {optionButton} - - {model.description} - - - ) - })} + {groupedModels.kind === 'flat' ? ( + modelOptions.map((model, index) => { + const optionButton = ( + + ) + if (!model.description) return optionButton + return ( + + {optionButton} + + {model.description} + + + ) + }) + ) : ( + groupedModels.groups.map((group) => { + const selectedVariant = group.variants.find( + (v) => v.option.id === selectedModel?.id, + ) + return ( +
+ + {group.base} + + { + if (!effort) return + const variant = group.variants.find((v) => v.effort === effort) + if (variant) selectModel(variant.option.id) + }} + className="h-7 shrink-0" + > + {group.variants.map((variant) => { + const item = ( + + {variant.effort} + + ) + if (!variant.option.description) return item + return ( + + {item} + + {variant.option.description} + + + ) + })} + +
+ ) + }) + )}
)} diff --git a/apps/gateway-admin/components/chat/chat-shell.tsx b/apps/gateway-admin/components/chat/chat-shell.tsx index 8418816eb..cea8cb3f0 100644 --- a/apps/gateway-admin/components/chat/chat-shell.tsx +++ b/apps/gateway-admin/components/chat/chat-shell.tsx @@ -48,13 +48,38 @@ export function ChatShell() { const [maxTokens, setMaxTokens] = React.useState(8192) const [draftText, setDraftText] = React.useState('') const [attachmentsResetToken, setAttachmentsResetToken] = React.useState(0) - const { runs, selectedRun, selectedRunId, providerHealth, selectedAgent, selectedModel, agents, projects } = - useChatSessionData() - const { selectRun, createSession, sendPrompt, selectAgent, selectModel } = useChatSessionActions() + const { + visibleRuns, + hiddenRunCount, + includeHiddenRuns, + dominantModelId, + lastDispatchError, + selectedRun, + selectedRunId, + providerHealth, + selectedAgent, + selectedModel, + agents, + projects, + } = useChatSessionData() + const { + selectRun, + createSession, + sendPrompt, + selectAgent, + selectModel, + setIncludeHiddenRuns, + bulkCloseHiddenSessions, + } = useChatSessionActions() const { messages } = useChatSessionStream() const { connectionState } = useChatSessionConnection() const providerReady = Boolean(providerHealth?.ready) - const providerUnavailableMessage = providerReady ? null : providerHealth?.message?.trim() || null + // Union the actual provider-down message with the transient dispatch-error + // surface so the user sees a banner for ANY actionable failure — but the + // input itself only disables when the provider is genuinely down. + const providerUnavailableMessage = providerReady + ? lastDispatchError?.message ?? null + : providerHealth?.message?.trim() || lastDispatchError?.message || null const createRun = React.useCallback(async () => { try { @@ -228,11 +253,16 @@ export function ChatShell() { void createRun()} + hiddenRunCount={hiddenRunCount} + includeHiddenRuns={includeHiddenRuns} + onToggleIncludeHidden={() => setIncludeHiddenRuns((v) => !v)} + onBulkCloseHidden={bulkCloseHiddenSessions} + dominantModelId={dominantModelId} /> diff --git a/apps/gateway-admin/components/chat/session-sidebar.tsx b/apps/gateway-admin/components/chat/session-sidebar.tsx index 1910843ff..229cc4a5d 100644 --- a/apps/gateway-admin/components/chat/session-sidebar.tsx +++ b/apps/gateway-admin/components/chat/session-sidebar.tsx @@ -11,6 +11,7 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger } from '@/component import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from '@/components/ui/tooltip' import { ScrollArea } from '@/components/ui/scroll-area' import { formatTimeAgo } from './mock-data' +import { ConfirmDialog, type ConfirmState } from '@/components/marketplace/confirm-dialog' import type { ACPProject, ACPRun, ACPRunStatus } from './types' interface SessionSidebarProps { @@ -21,6 +22,16 @@ interface SessionSidebarProps { selectedProjectId: string | null onSelectRun: (runId: string, projectId: string) => void onNewRun: (projectId: string) => void + /** Number of failed/closed/cancelled runs hidden from `runs`. */ + hiddenRunCount?: number + /** Whether the toggle currently includes hidden runs in `runs`. */ + includeHiddenRuns?: boolean + /** Called when the user clicks the show/hide toggle chip. */ + onToggleIncludeHidden?: () => void + /** Called when the user confirms the bulk cleanup. Should resolve to closed/failed counts. */ + onBulkCloseHidden?: () => Promise<{ closedCount: number; failedCount: number }> + /** modelId shared by more than half of the visible runs — used to suppress redundant badges. */ + dominantModelId?: string | null } function RunIcon({ status, agentId }: { status: ACPRunStatus; agentId: string }) { @@ -48,15 +59,25 @@ function RunRow({ run, isSelected, onSelect, + dominantModelId, }: { run: ACPRun isSelected: boolean onSelect: () => void + dominantModelId: string | null }) { + // Hide the per-row model badge when this run matches the dominant model + // across the visible list — the badge is redundant noise in that case. + const hideBadge = + dominantModelId !== null && run.modelId !== null && run.modelId === dominantModelId + // The screen-reader label always carries the model name, even when the + // visual badge is suppressed. + const ariaLabel = run.modelName ? `${run.title} · ${run.modelName}` : run.title return ( + {onBulkCloseHidden && ( + + )} + + )} + {/* Project list */}
@@ -253,10 +324,18 @@ export function SessionSidebar({ isActiveProject={activeProjectId === project.id} onSelectRun={onSelectRun} onNewRun={onNewRun} + dominantModelId={dominantModelId} /> ))}
+ + { + if (!open) setConfirm(null) + }} + /> ) } diff --git a/apps/gateway-admin/components/floating-chat-shell.tsx b/apps/gateway-admin/components/floating-chat-shell.tsx index b6031a6b1..2fcfc4870 100644 --- a/apps/gateway-admin/components/floating-chat-shell.tsx +++ b/apps/gateway-admin/components/floating-chat-shell.tsx @@ -44,9 +44,28 @@ export function FloatingChatShell({ config, }: FloatingChatShellProps) { // ---- Context consumers ---- - const { runs, selectedRun, selectedRunId, providerHealth, selectedAgent, agents, projects, pageContext } = - useChatSessionData() - const { createSession, selectRun, sendPrompt: sendPromptAction, selectAgent } = useChatSessionActions() + const { + visibleRuns, + hiddenRunCount, + includeHiddenRuns, + dominantModelId, + lastDispatchError, + selectedRun, + selectedRunId, + providerHealth, + selectedAgent, + agents, + projects, + pageContext, + } = useChatSessionData() + const { + createSession, + selectRun, + sendPrompt: sendPromptAction, + selectAgent, + setIncludeHiddenRuns, + bulkCloseHiddenSessions, + } = useChatSessionActions() const { connectionState } = useChatSessionConnection() const { messages } = useChatSessionStream() @@ -55,7 +74,11 @@ export function FloatingChatShell({ const [lastActionError, setLastActionError] = React.useState(null) const providerReady = Boolean(providerHealth?.ready) - const visibleError = lastActionError ?? (!providerReady ? providerHealth?.message : null) + // Surface order: explicit action error → transient dispatch error → provider-down banner. + const visibleError = + lastActionError ?? + lastDispatchError?.message ?? + (!providerReady ? providerHealth?.message : null) // ---- sendPrompt (reads pageContext from provider + config) ---- const sendPrompt = React.useCallback( @@ -188,11 +211,16 @@ export function FloatingChatShell({ selectRun(runId)} onNewRun={() => void createRun()} + hiddenRunCount={hiddenRunCount} + includeHiddenRuns={includeHiddenRuns} + onToggleIncludeHidden={() => setIncludeHiddenRuns((v) => !v)} + onBulkCloseHidden={bulkCloseHiddenSessions} + dominantModelId={dominantModelId} /> )} diff --git a/apps/gateway-admin/lib/chat/acp-normalizers.test.ts b/apps/gateway-admin/lib/chat/acp-normalizers.test.ts index f42dfa5f0..689eff529 100644 --- a/apps/gateway-admin/lib/chat/acp-normalizers.test.ts +++ b/apps/gateway-admin/lib/chat/acp-normalizers.test.ts @@ -1,7 +1,7 @@ import test from 'node:test' import assert from 'node:assert/strict' -import { sameProviderList } from './acp-normalizers' +import { deriveSessionTitle, sameProviderList, toRun, type RawSessionSummary } from './acp-normalizers' import type { ProviderHealth } from '@/lib/acp/types' function provider(modelName: string): ProviderHealth { @@ -28,3 +28,52 @@ test('sameProviderList detects model metadata changes for stable model ids', () assert.equal(sameProviderList([provider('GPT 5.4')], [provider('GPT 5.4')]), true) assert.equal(sameProviderList([provider('GPT 5.4')], [provider('GPT 5.4 refreshed')]), false) }) + +function rawSession(overrides: Partial = {}): RawSessionSummary { + return { + id: 'sess-1', + provider: 'codex-acp', + title: 'Real conversation title', + cwd: '/tmp', + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + modelId: 'gpt-5.5/medium', + ...overrides, + } +} + +test('deriveSessionTitle keeps real titles unchanged', () => { + assert.equal(deriveSessionTitle(rawSession()), 'Real conversation title') +}) + +test('deriveSessionTitle replaces "New session" placeholder with provider/model/time label', () => { + const derived = deriveSessionTitle(rawSession({ title: 'New session' })) + assert.notEqual(derived, 'New session') + // Contains provider name, short model id, and a time-ago marker. + assert.match(derived, /codex-acp/) + assert.match(derived, /gpt-5\.5/) + assert.match(derived, /now|ago/) +}) + +test('deriveSessionTitle replaces "action route session" placeholder', () => { + const derived = deriveSessionTitle(rawSession({ title: 'action route session', modelId: null })) + assert.notEqual(derived, 'action route session') + assert.match(derived, /codex-acp/) +}) + +test('deriveSessionTitle replaces empty title', () => { + const derived = deriveSessionTitle(rawSession({ title: '' })) + assert.notEqual(derived, '') +}) + +test('deriveSessionTitle drops short model id when modelId missing', () => { + const derived = deriveSessionTitle(rawSession({ title: 'New session', modelId: null, model_id: null })) + assert.doesNotMatch(derived, /\/$/) + assert.match(derived, /codex-acp/) +}) + +test('toRun applies derived title for placeholder sessions', () => { + const run = toRun(rawSession({ title: 'New session' })) + assert.notEqual(run.title, 'New session') + assert.match(run.title, /codex-acp/) +}) diff --git a/apps/gateway-admin/lib/chat/acp-normalizers.ts b/apps/gateway-admin/lib/chat/acp-normalizers.ts index 8c963dac4..e05f70e13 100644 --- a/apps/gateway-admin/lib/chat/acp-normalizers.ts +++ b/apps/gateway-admin/lib/chat/acp-normalizers.ts @@ -64,11 +64,41 @@ export type RawSessionSummary = { // ---- Normalization helpers ---- +const PLACEHOLDER_TITLES = new Set(['New session', 'action route session', '']) + +function shortModelId(id: string | null | undefined): string | null { + if (!id) return null + // Strip the codex effort suffix: "gpt-5.5/medium" → "gpt-5.5". + return id.split('/')[0] ?? id +} + +function timeAgo(iso: string | null | undefined): string | null { + if (!iso) return null + const date = new Date(iso) + if (Number.isNaN(date.getTime())) return null + const seconds = Math.floor((Date.now() - date.getTime()) / 1000) + if (seconds < 60) return 'now' + if (seconds < 3600) return `${Math.floor(seconds / 60)}m ago` + if (seconds < 86400) return `${Math.floor(seconds / 3600)}h ago` + return `${Math.floor(seconds / 86400)}d ago` +} + +export function deriveSessionTitle(session: RawSessionSummary): string { + if (!PLACEHOLDER_TITLES.has(session.title ?? '')) return session.title + const parts = [ + session.provider, + shortModelId(session.modelId ?? session.model_id ?? null), + timeAgo(session.createdAt ?? session.created_at ?? null), + ].filter((part): part is string => Boolean(part)) + if (parts.length === 0) return session.title || 'session' + return parts.join(' · ') +} + export function normalizeSessionSummary(session: RawSessionSummary): BridgeSessionSummary { return { id: session.id, provider: session.provider, - title: session.title, + title: deriveSessionTitle(session), cwd: session.cwd, status: (session.status ?? session.state ?? 'idle') as BridgeSessionSummary['status'], createdAt: session.createdAt ?? session.created_at ?? '', diff --git a/apps/gateway-admin/lib/chat/chat-session-provider.tsx b/apps/gateway-admin/lib/chat/chat-session-provider.tsx index 7c0bfcb45..8718872d5 100644 --- a/apps/gateway-admin/lib/chat/chat-session-provider.tsx +++ b/apps/gateway-admin/lib/chat/chat-session-provider.tsx @@ -44,6 +44,8 @@ import { errorMessageFromPayload, sameProviderList, } from './acp-normalizers' +import { filterVisibleRuns } from './session-filters' +import { dominantModelId as computeDominantModelId } from './dominant-model' import type { ACPAgent, ACPRun, ACPProject, ACPMessage, ACPModelOption } from '@/components/chat/types' import type { BridgeSessionSummary, ProviderHealth, BridgeEvent } from '@/lib/acp/types' import type { SessionEventConnectionState } from './use-session-events' @@ -75,6 +77,11 @@ export type PageContext = { export type ChatSessionDataContextValue = { runs: ACPRun[] + visibleRuns: ACPRun[] + hiddenRunCount: number + includeHiddenRuns: boolean + dominantModelId: string | null + lastDispatchError: { provider: string; message: string; at: number } | null selectedRunId: string | null selectedRun: ACPRun | null providerHealth: ProviderHealth | null @@ -100,6 +107,8 @@ export type ChatSessionActionsContextValue = { selectAgent: (providerId: string) => void selectModel: (providerId: string, modelId: string) => void setPageContext: (ctx: PageContext) => void + setIncludeHiddenRuns: (include: boolean | ((prev: boolean) => boolean)) => void + bulkCloseHiddenSessions: () => Promise<{ closedCount: number; failedCount: number }> } export type ChatSessionConnectionContextValue = { @@ -188,6 +197,15 @@ export function ChatSessionProvider({ const [selectedModelByProvider, setSelectedModelByProvider] = React.useState>({}) const [optimisticMessages, setOptimisticMessages] = React.useState([]) const [pageContext, setPageContext] = React.useState(null) + const [includeHiddenRuns, setIncludeHiddenRuns] = React.useState(false) + // Transient dispatch error surface that does NOT mutate `providerHealth`. + // Earlier code path stomped synthetic `{ ready: false, ... }` into + // `providerHealth` whenever a session or prompt call failed, which made the + // provider look permanently down even when only one dispatch had failed. + // Auto-clears when the user switches providers (see effect below). + const [lastDispatchError, setLastDispatchError] = React.useState< + { provider: string; message: string; at: number } | null + >(null) // SSE state const [events, setEvents] = React.useState([]) @@ -362,13 +380,11 @@ export function ChatSessionProvider({ payload as ErrorPayload | null, 'Failed to create ACP session.', ) - setProviderHealth((current) => ({ - provider: current?.provider ?? 'codex-acp', - ready: false, - command: current?.command ?? '', - args: current?.args ?? [], + setLastDispatchError({ + provider: providerHealth?.provider ?? selectedProviderId ?? 'codex-acp', message, - })) + at: Date.now(), + }) throw new Error(message) } // Narrow: if payload has an `id` field it is a SessionCreatePayload; otherwise ErrorPayload @@ -453,13 +469,11 @@ export function ChatSessionProvider({ }) } catch (error) { const message = error instanceof Error ? error.message : 'Failed to send prompt to ACP session.' - setProviderHealth((current) => ({ - provider: current?.provider ?? selectedProviderId ?? 'codex-acp', - ready: false, - command: current?.command ?? '', - args: current?.args ?? [], + setLastDispatchError({ + provider: providerHealth?.provider ?? selectedProviderId ?? 'codex-acp', message, - })) + at: Date.now(), + }) throw error } }, @@ -467,6 +481,7 @@ export function ChatSessionProvider({ createSession, fetchAcp, isMobileViewport, + providerHealth?.provider, refreshSessions, selectedProviderId, selectedAgent.id, @@ -512,6 +527,16 @@ export function ChatSessionProvider({ } }, [providers, selectedProviderId]) + // Clear lastDispatchError when the user moves to a different provider — the + // error is scoped to whichever provider failed; following them around is + // misleading. + React.useEffect(() => { + if (!lastDispatchError) return + if (lastDispatchError.provider !== selectedProviderId) { + setLastDispatchError(null) + } + }, [selectedProviderId, lastDispatchError]) + // Auto-bootstrap first session — after sessions have been loaded so we don't race with refreshSessions. // Gated on `autoBootstrap`: the provider mounts on every admin page, but we only want to mint a // session when the user has actually surfaced the chat (popover open or /chat route). Otherwise @@ -645,9 +670,65 @@ export function ChatSessionProvider({ // ---- Context values ---- + const visibleRuns = React.useMemo( + () => filterVisibleRuns(runs, { includeHidden: includeHiddenRuns }), + [runs, includeHiddenRuns], + ) + const hiddenRunCount = runs.length - visibleRuns.length + const dominantModelId = React.useMemo(() => computeDominantModelId(visibleRuns), [visibleRuns]) + + const bulkCloseHiddenSessions = React.useCallback(async () => { + // The API auto-injects `confirm: true` for destructive actions and + // `principal` from the auth context, so the client only sends the selector. + const response = await fetchAcp('', { + method: 'POST', + body: JSON.stringify({ + action: 'session.bulk_close', + params: { + // Default sweep selector per lab-de6yc.1: failed/closed older than 7 days. + selector: { + states: ['failed', 'closed'], + max_age_days: 7, + max_count: 500, + }, + }, + }), + }) + if (!response.ok) { + const errorPayload = await readJsonSafe(response) + throw new Error(errorMessageFromPayload(errorPayload, 'Failed to clean up sessions')) + } + const payload = (await readJsonSafe(response)) as + | { closed?: unknown; failed?: unknown } + | null + if ( + !payload || + !Array.isArray(payload.closed) || + !Array.isArray(payload.failed) || + payload.closed.some((id) => typeof id !== 'string') || + payload.failed.some( + (item) => + typeof item !== 'object' || + item == null || + typeof (item as { id?: unknown }).id !== 'string', + ) + ) { + throw new Error('Invalid cleanup response payload') + } + const closedCount = payload.closed.length + const failedCount = payload.failed.length + await refreshSessions() + return { closedCount, failedCount } + }, [fetchAcp, refreshSessions]) + const dataValue = React.useMemo( () => ({ runs, + visibleRuns, + hiddenRunCount, + includeHiddenRuns, + dominantModelId, + lastDispatchError, selectedRunId, selectedRun, providerHealth, @@ -660,7 +741,7 @@ export function ChatSessionProvider({ sessionsLoaded, pageContext, }), - [runs, selectedRunId, selectedRun, providerHealth, providers, selectedProviderId, selectedAgent, selectedModel, agents, projects, sessionsLoaded, pageContext], + [runs, visibleRuns, hiddenRunCount, includeHiddenRuns, dominantModelId, lastDispatchError, selectedRunId, selectedRun, providerHealth, providers, selectedProviderId, selectedAgent, selectedModel, agents, projects, sessionsLoaded, pageContext], ) const actionsValue = React.useMemo( @@ -673,8 +754,10 @@ export function ChatSessionProvider({ selectAgent, selectModel, setPageContext: setPageContextStable, + setIncludeHiddenRuns, + bulkCloseHiddenSessions, }), - [createSession, selectRun, sendPrompt, refreshSessions, refreshProvider, selectAgent, selectModel, setPageContextStable], + [createSession, selectRun, sendPrompt, refreshSessions, refreshProvider, selectAgent, selectModel, setPageContextStable, bulkCloseHiddenSessions], ) const connectionValue = React.useMemo( diff --git a/apps/gateway-admin/lib/chat/dominant-model.test.ts b/apps/gateway-admin/lib/chat/dominant-model.test.ts new file mode 100644 index 000000000..6ffa9e9e4 --- /dev/null +++ b/apps/gateway-admin/lib/chat/dominant-model.test.ts @@ -0,0 +1,42 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { dominantModelId } from './dominant-model' +import type { ACPRun } from '@/components/chat/types' + +function run(modelId: string | null): ACPRun { + return { modelId } as ACPRun +} + +test('empty list returns null', () => { + assert.equal(dominantModelId([]), null) +}) + +test('single run returns its modelId', () => { + assert.equal(dominantModelId([run('gpt-5.5')]), 'gpt-5.5') +}) + +test('strict majority returns the majority id', () => { + assert.equal( + dominantModelId([run('gpt-5.5'), run('gpt-5.5'), run('gpt-5.5'), run('claude')]), + 'gpt-5.5', + ) +}) + +test('exact 50/50 returns null', () => { + assert.equal(dominantModelId([run('a'), run('a'), run('b'), run('b')]), null) +}) + +test('all distinct returns null', () => { + assert.equal(dominantModelId([run('a'), run('b'), run('c')]), null) +}) + +test('null modelIds count in the denominator', () => { + // 2/3 is strict majority for 'gpt-5.5' since floor(3/2)+1 = 2. + assert.equal(dominantModelId([run('gpt-5.5'), run('gpt-5.5'), run(null)]), 'gpt-5.5') +}) + +test('null dominant returns null (no badge string)', () => { + // null is the majority — treat as "no dominant" so badges still render. + assert.equal(dominantModelId([run(null), run(null), run(null), run('gpt-5.5')]), null) +}) diff --git a/apps/gateway-admin/lib/chat/dominant-model.ts b/apps/gateway-admin/lib/chat/dominant-model.ts new file mode 100644 index 000000000..322ae274f --- /dev/null +++ b/apps/gateway-admin/lib/chat/dominant-model.ts @@ -0,0 +1,26 @@ +import type { ACPRun } from '@/components/chat/types' + +/** + * Returns the modelId that exceeds strict majority (> floor(n/2)) across the + * given runs, or null when no such modelId exists, the list is empty, or the + * dominant value is null. + * + * Single-run lists return that run's modelId — consumers should still render + * the badge for a lone row since semantic dominance is undefined at N=1. + */ +export function dominantModelId(runs: ACPRun[]): string | null { + if (runs.length === 0) return null + if (runs.length === 1) return runs[0]?.modelId ?? null + + const counts = new Map() + for (const run of runs) { + const key = run.modelId ?? null + counts.set(key, (counts.get(key) ?? 0) + 1) + } + + const threshold = Math.floor(runs.length / 2) + 1 + for (const [id, count] of counts) { + if (count >= threshold) return id + } + return null +} diff --git a/apps/gateway-admin/lib/chat/model-grouping.test.ts b/apps/gateway-admin/lib/chat/model-grouping.test.ts new file mode 100644 index 000000000..d5f577a8e --- /dev/null +++ b/apps/gateway-admin/lib/chat/model-grouping.test.ts @@ -0,0 +1,62 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { groupModels, parseModelId } from './model-grouping' +import type { ACPModelOption } from '@/components/chat/types' + +function option(id: string, name?: string): ACPModelOption { + return { id, name: name ?? id } +} + +test('parseModelId handles slash separator', () => { + assert.deepEqual(parseModelId('gpt-5.5/medium'), { base: 'gpt-5.5', effort: 'medium' }) +}) + +test('parseModelId handles paren separator after normalization', () => { + assert.deepEqual(parseModelId('GPT-5.5 (medium)'), { base: 'GPT-5.5', effort: 'medium' }) +}) + +test('parseModelId returns null for non-effort suffixes', () => { + assert.equal(parseModelId('gpt-5.4-mini'), null) + assert.equal(parseModelId('gpt-5.3-codex-spark'), null) + assert.equal(parseModelId('Default (recommended)'), null) + assert.equal(parseModelId('Auto (Gemini 3)'), null) +}) + +test('parseModelId rejects names containing slash that are not effort suffixes', () => { + assert.equal(parseModelId('OpenCode Zen/Big Pickle'), null) +}) + +test('groupModels returns grouped result for codex-style list', () => { + const opts = [ + option('gpt-5.5/low', 'GPT-5.5 (low)'), + option('gpt-5.5/medium', 'GPT-5.5 (medium)'), + option('gpt-5.5/high', 'GPT-5.5 (high)'), + option('gpt-5.5/xhigh', 'GPT-5.5 (xhigh)'), + option('gpt-5.4/low', 'GPT-5.4 (low)'), + ] + const result = groupModels(opts) + assert.equal(result.kind, 'grouped') + if (result.kind !== 'grouped') return + assert.equal(result.groups.length, 2) + assert.equal(result.groups[0].base, 'gpt-5.5') + assert.equal(result.groups[0].variants.length, 4) + assert.deepEqual( + result.groups[0].variants.map((v) => v.effort), + ['low', 'medium', 'high', 'xhigh'], + ) +}) + +test('groupModels returns flat for the claude default option', () => { + assert.equal(groupModels([option('default', 'Default (recommended)')]).kind, 'flat') +}) + +test('groupModels returns flat if any option fails to parse', () => { + const opts = [option('gpt-5.5/medium', 'GPT-5.5 (medium)'), option('gpt-5.4-mini', 'GPT-5.4-Mini')] + assert.equal(groupModels(opts).kind, 'flat') +}) + +test('groupModels returns flat for empty and single-option lists', () => { + assert.equal(groupModels([]).kind, 'flat') + assert.equal(groupModels([option('x/medium', 'X')]).kind, 'flat') +}) diff --git a/apps/gateway-admin/lib/chat/model-grouping.ts b/apps/gateway-admin/lib/chat/model-grouping.ts new file mode 100644 index 000000000..da7a6df9a --- /dev/null +++ b/apps/gateway-admin/lib/chat/model-grouping.ts @@ -0,0 +1,49 @@ +import type { ACPModelOption } from '@/components/chat/types' + +export type Effort = 'low' | 'medium' | 'high' | 'xhigh' + +const EFFORT_ORDER: readonly Effort[] = ['low', 'medium', 'high', 'xhigh'] + +export function parseModelId(id: string): { base: string; effort: Effort } | null { + const normalized = id.replace(/\s*\(([^)]+)\)\s*$/, ' $1') + const match = /^(.+?)[\s/]\s*(low|medium|high|xhigh)$/i.exec(normalized) + if (!match) return null + const base = match[1].trim() + const effort = match[2].toLowerCase() as Effort + if (base.includes('/')) return null + return { base, effort } +} + +export interface GroupedOption { + base: string + variants: Array<{ effort: Effort; option: ACPModelOption }> +} + +export type GroupingResult = + | { kind: 'flat'; options: ACPModelOption[] } + | { kind: 'grouped'; groups: GroupedOption[] } + +export function groupModels(options: ACPModelOption[]): GroupingResult { + if (options.length <= 1) return { kind: 'flat', options } + + const parsed = options.map((option) => ({ option, parsed: parseModelId(option.id) })) + if (parsed.some((p) => p.parsed === null)) { + return { kind: 'flat', options } + } + + const groupMap = new Map() + for (const { option, parsed: p } of parsed) { + if (!p) continue + const existing = groupMap.get(p.base) ?? { base: p.base, variants: [] } + existing.variants.push({ effort: p.effort, option }) + groupMap.set(p.base, existing) + } + + for (const group of groupMap.values()) { + group.variants.sort( + (a, b) => EFFORT_ORDER.indexOf(a.effort) - EFFORT_ORDER.indexOf(b.effort), + ) + } + + return { kind: 'grouped', groups: Array.from(groupMap.values()) } +} diff --git a/apps/gateway-admin/lib/chat/session-filters.test.ts b/apps/gateway-admin/lib/chat/session-filters.test.ts new file mode 100644 index 000000000..46d328a24 --- /dev/null +++ b/apps/gateway-admin/lib/chat/session-filters.test.ts @@ -0,0 +1,54 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { filterVisibleRuns, isHiddenState } from './session-filters' +import type { ACPRun } from '@/components/chat/types' + +function run(id: string, status: ACPRun['status']): ACPRun { + return { + id, + projectId: 'p', + agentId: 'a', + provider: 'codex-acp', + title: id, + createdAt: new Date(), + updatedAt: new Date(), + status, + providerSessionId: 'psid', + cwd: '.', + } +} + +test('isHiddenState matches failed and closed only', () => { + assert.equal(isHiddenState('failed'), true) + assert.equal(isHiddenState('closed'), true) + assert.equal(isHiddenState('cancelled'), false) + assert.equal(isHiddenState('idle'), false) + assert.equal(isHiddenState('completed'), false) + assert.equal(isHiddenState(undefined), false) +}) + +test('filterVisibleRuns hides failed/closed by default', () => { + const visible = filterVisibleRuns( + [ + run('a', 'idle'), + run('b', 'failed'), + run('c', 'closed'), + run('d', 'completed'), + run('e', 'cancelled'), + ], + { includeHidden: false }, + ) + assert.deepEqual( + visible.map((r) => r.id), + ['a', 'd', 'e'], + ) +}) + +test('filterVisibleRuns passes everything when includeHidden=true', () => { + const visible = filterVisibleRuns( + [run('a', 'idle'), run('b', 'failed')], + { includeHidden: true }, + ) + assert.equal(visible.length, 2) +}) diff --git a/apps/gateway-admin/lib/chat/session-filters.ts b/apps/gateway-admin/lib/chat/session-filters.ts new file mode 100644 index 000000000..bfb472119 --- /dev/null +++ b/apps/gateway-admin/lib/chat/session-filters.ts @@ -0,0 +1,15 @@ +import type { ACPRun, ACPRunStatus } from '@/components/chat/types' + +const HIDDEN_STATES: ReadonlySet = new Set(['failed', 'closed']) + +export function isHiddenState(status: ACPRunStatus | undefined): boolean { + return status !== undefined && HIDDEN_STATES.has(status) +} + +export function filterVisibleRuns( + runs: ACPRun[], + options: { includeHidden: boolean }, +): ACPRun[] { + if (options.includeHidden) return runs + return runs.filter((run) => !isHiddenState(run.status)) +} diff --git a/apps/gateway-admin/lib/chat/use-chat-session-controller.ts b/apps/gateway-admin/lib/chat/use-chat-session-controller.ts index bd4eacea1..2dc28224a 100644 --- a/apps/gateway-admin/lib/chat/use-chat-session-controller.ts +++ b/apps/gateway-admin/lib/chat/use-chat-session-controller.ts @@ -163,6 +163,15 @@ export type SendPromptForSelectedProviderOptions = { includePageContext?: boolean } +type StartAndPromptResult = { + session_id: string + provider_session_id?: string + model_id?: string + provider: string + title: string + stream_ticket?: string +} + export async function sendPromptForSelectedProvider({ payload, selectedRun, @@ -177,6 +186,58 @@ export async function sendPromptForSelectedProvider({ pageContext, includePageContext = false, }: SendPromptForSelectedProviderOptions) { + // When no session is selected, fire one atomic action instead of two + // sequential calls. This collapses the prior orphan-row race: any failure + // here is server-side atomic — either the session exists with its first + // prompt queued, or nothing was created. + if (!selectedRun) { + const optimisticRunId = `pending-${Date.now()}` + const optimisticId = `optimistic-${optimisticRunId}-${Date.now()}` + addOptimisticMessage({ + id: optimisticId, + runId: optimisticRunId, + role: 'user' as const, + text: payload.text, + createdAt: new Date(), + thoughts: [], + toolCalls: [], + version: 0, + }) + + const orchestratorBody = { + action: 'session.start_and_prompt', + params: { + prompt: payload.text, + ...(selectedProviderId && { provider: selectedProviderId }), + ...(selectedModelId && { model: selectedModelId }), + ...(payload.attachments.length > 0 && { attachments: payload.attachments }), + ...(includePageContext && pageContext !== null && pageContext !== undefined && { page_context: pageContext }), + }, + } + + const response = await fetchAcp('', { + method: 'POST', + body: JSON.stringify(orchestratorBody), + }) + if (!response.ok) { + removeOptimisticMessage(optimisticId) + const errorPayload = await readJsonSafe(response) + throw new Error( + errorMessageFromPayload(errorPayload, 'Failed to start ACP session.'), + ) + } + // Drop the optimistic placeholder — refreshSessions() below will surface + // the materialized run with its real id, and the SSE stream will replay + // the user message we just queued server-side. + removeOptimisticMessage(optimisticId) + const _result = (await readJsonSafe(response)) as StartAndPromptResult | null + // The server-side action atomically queued the prompt; surfacing the new + // run via the standard refresh is sufficient. + void _result + await refreshSessions() + return + } + const runId = await ensurePromptRunIdForProvider( selectedRun, selectedProviderId, diff --git a/apps/gateway-admin/lib/chat/use-list-keyboard.test.ts b/apps/gateway-admin/lib/chat/use-list-keyboard.test.ts new file mode 100644 index 000000000..dc2cd02e0 --- /dev/null +++ b/apps/gateway-admin/lib/chat/use-list-keyboard.test.ts @@ -0,0 +1,48 @@ +import test from 'node:test' +import assert from 'node:assert/strict' + +import { nextNavIndex, shouldResetActiveIndex } from './use-list-keyboard' + +test('ArrowDown wraps at end', () => { + assert.equal(nextNavIndex(0, 'ArrowDown', 3), 1) + assert.equal(nextNavIndex(2, 'ArrowDown', 3), 0) +}) + +test('ArrowUp wraps at start', () => { + assert.equal(nextNavIndex(0, 'ArrowUp', 3), 2) + assert.equal(nextNavIndex(1, 'ArrowUp', 3), 0) +}) + +test('Home and End jump to ends', () => { + assert.equal(nextNavIndex(2, 'Home', 5), 0) + assert.equal(nextNavIndex(0, 'End', 5), 4) +}) + +test('returns null for empty list', () => { + assert.equal(nextNavIndex(0, 'ArrowDown', 0), null) + assert.equal(nextNavIndex(0, 'Home', 0), null) +}) + +test('returns null for unrelated keys', () => { + assert.equal(nextNavIndex(0, 'a', 5), null) + assert.equal(nextNavIndex(0, 'Enter', 5), null) + assert.equal(nextNavIndex(0, 'Tab', 5), null) +}) + +test('shouldResetActiveIndex flags shrink-past-active', () => { + // List shrinks below the active index — hook must reset. + assert.equal(shouldResetActiveIndex(3, 2), true) + assert.equal(shouldResetActiveIndex(1, 1), true) +}) + +test('shouldResetActiveIndex leaves valid index alone', () => { + assert.equal(shouldResetActiveIndex(0, 5), false) + assert.equal(shouldResetActiveIndex(4, 5), false) +}) + +test('shouldResetActiveIndex does not reset on empty list', () => { + // Empty list is a transient state; preserve the index so the hook does not + // thrash when the list briefly empties between provider switches. + assert.equal(shouldResetActiveIndex(0, 0), false) + assert.equal(shouldResetActiveIndex(3, 0), false) +}) diff --git a/apps/gateway-admin/lib/chat/use-list-keyboard.ts b/apps/gateway-admin/lib/chat/use-list-keyboard.ts new file mode 100644 index 000000000..9c72a0da5 --- /dev/null +++ b/apps/gateway-admin/lib/chat/use-list-keyboard.ts @@ -0,0 +1,64 @@ +import * as React from 'react' + +const NAV_KEYS = new Set(['ArrowDown', 'ArrowUp', 'Home', 'End']) + +/** + * Pure navigation reducer for keyboard list pickers. Returns the next index + * for a given key press, or null if the key is not a navigation key. Wraps + * at both ends. Returns null when count is 0. + * + * Callers wire this up alongside DOM focus management (e.g., focusing the + * underlying option button) — keeping that side-effect outside the helper + * keeps the helper deterministic and unit-testable. + */ +export function nextNavIndex(current: number, key: string, count: number): number | null { + if (count <= 0 || !NAV_KEYS.has(key)) return null + switch (key) { + case 'ArrowDown': + return (current + 1) % count + case 'ArrowUp': + return (current - 1 + count) % count + case 'Home': + return 0 + case 'End': + return count - 1 + default: + return null + } +} + +export interface ListKeyboardHandle { + activeIndex: number + setActiveIndex: React.Dispatch> +} + +/** + * Pure predicate for the shrink-reset path. Returns true when the active index + * has fallen out of range because the list shrank but is still non-empty. + * An empty list does not trigger a reset — that case is handled separately so + * the hook does not thrash state when transient empty states arrive. + */ +export function shouldResetActiveIndex(activeIndex: number, count: number): boolean { + return count > 0 && activeIndex >= count +} + +/** + * Shared state shape for keyboard-navigable lists. Tracks an active index and + * resets to 0 when `count` shrinks past the current index (e.g., the list + * collapses on a provider switch). Pair with `nextNavIndex` to react to keys. + */ +export function useListKeyboard({ + count, + initialIndex = 0, +}: { + count: number + initialIndex?: number +}): ListKeyboardHandle { + const [activeIndex, setActiveIndex] = React.useState(initialIndex) + + React.useEffect(() => { + if (shouldResetActiveIndex(activeIndex, count)) setActiveIndex(0) + }, [count, activeIndex]) + + return { activeIndex, setActiveIndex } +} diff --git a/crates/lab/Cargo.toml b/crates/lab/Cargo.toml index c9e852222..b39b4cc80 100644 --- a/crates/lab/Cargo.toml +++ b/crates/lab/Cargo.toml @@ -84,7 +84,7 @@ ulid = "1" num_cpus = "1" # optional — deploy feature -regex = { version = "1", optional = true } +regex = { version = "1" } rmcp.workspace = true # Transport-layer body-cap wrapper (dispatch/upstream/http_client.rs) uses @@ -134,7 +134,7 @@ marketplace = ["mcpregistry"] services-all = [ ] default = ["all"] -deploy = ["lab-apis/deploy", "dep:regex"] +deploy = ["lab-apis/deploy"] extract = ["lab-apis/extract"] fs = ["dep:walkdir", "dep:globset", "dep:unicode-normalization", "dep:rustix"] lab-admin = [] diff --git a/crates/lab/src/acp/registry.rs b/crates/lab/src/acp/registry.rs index d98c33723..4ca4b2889 100644 --- a/crates/lab/src/acp/registry.rs +++ b/crates/lab/src/acp/registry.rs @@ -23,6 +23,7 @@ use lab_apis::acp::types::{ AcpEvent, AcpModelOption, AcpProviderHealth, AcpSessionState, AcpSessionSummary, }; +use crate::dispatch::acp::params::BulkCloseSelector; use crate::dispatch::acp::persistence::SqliteAcpPersistence; use crate::dispatch::error::ToolError; @@ -127,6 +128,36 @@ impl Session { // Registry // --------------------------------------------------------------------------- +/// Partial-success envelope for `bulk_close_sessions`. +#[derive(Debug, Clone, serde::Serialize)] +pub struct BulkCloseResult { + pub closed: Vec, + pub failed: Vec, +} + +/// Per-session failure inside a `BulkCloseResult.failed[]` array. +#[derive(Debug, Clone, serde::Serialize)] +pub struct BulkCloseFailure { + pub id: String, + pub kind: String, + pub message: String, +} + +/// Outcome of `start_and_prompt` — the orchestrator that collapses +/// `session.start` + `session.prompt` into a single atomic call. The session +/// is closed before returning if the prompt step fails, so callers never +/// see an orphan row from this code path. +#[derive(Debug, Clone, serde::Serialize)] +pub struct StartAndPromptResult { + pub session_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub provider_session_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub model_id: Option, + pub provider: String, + pub title: String, +} + #[derive(Clone)] pub struct AcpSessionRegistry { sessions: Arc>>>, @@ -987,6 +1018,181 @@ impl AcpSessionRegistry { Ok(()) } + /// Close every session the caller owns that matches the typed selector. + /// Returns a per-id partial-success envelope. Sessions belonging to other + /// principals are silently omitted (matches the not_found masking pattern + /// of `close_session`). + pub async fn bulk_close_sessions( + &self, + selector: BulkCloseSelector, + principal: &str, + ) -> Result { + if principal.trim().is_empty() { + return Err(ToolError::Sdk { + sdk_kind: "auth_failed".to_string(), + message: "authenticated principal required for bulk_close".to_string(), + }); + } + let now = jiff::Timestamp::now(); + let max_age_secs: Option = selector.max_age_days.map(|d| i64::from(d) * 86_400); + + let candidates: Vec = { + let sessions = self.sessions.read().await; + let mut ids = Vec::new(); + for session in sessions.values() { + if session.principal != principal { + continue; + } + let summary = session.summary.read().await; + if !selector.states.is_empty() && !selector.states.contains(&summary.state) { + continue; + } + if let Some(window) = max_age_secs { + let updated_secs = summary + .updated_at + .parse::() + .ok() + .map(|ts| now.as_second() - ts.as_second()) + .unwrap_or(0); + if updated_secs < window { + continue; + } + } + ids.push(session.id.clone()); + } + ids + }; + + if (candidates.len() as u32) > selector.max_count { + return Err(ToolError::InvalidParam { + message: format!( + "selector matches {} sessions; max_count is {}", + candidates.len(), + selector.max_count + ), + param: "selector".to_string(), + }); + } + + let semaphore = Arc::new(tokio::sync::Semaphore::new(5)); + let mut handles = Vec::with_capacity(candidates.len()); + for id in candidates { + let sem = semaphore.clone(); + let registry = self.clone(); + let principal_owned = principal.to_string(); + let id_for_task = id.clone(); + handles.push(( + id, + tokio::spawn(async move { + let _permit = sem.acquire().await.expect("bulk_close semaphore closed"); + registry.close_session(&id_for_task, &principal_owned).await + }), + )); + } + + let mut closed = Vec::new(); + let mut failed = Vec::new(); + for (id, handle) in handles { + match handle.await { + Ok(Ok(())) => closed.push(id), + Ok(Err(error)) => { + // Silently skip sessions reaped or otherwise inaccessible between + // the snapshot and close attempt — preserves not_found masking. + if error.kind() == "not_found" { + continue; + } + failed.push(BulkCloseFailure { + id, + kind: error.kind().to_string(), + message: error.user_message().to_string(), + }); + } + Err(join_error) => { + tracing::error!( + surface = "acp", + service = "registry", + action = "session.bulk_close", + session_id = %id, + error = %join_error, + "bulk_close worker task failed", + ); + failed.push(BulkCloseFailure { + id, + kind: "internal_error".to_string(), + message: "bulk_close worker failed".to_string(), + }); + } + } + } + + tracing::info!( + surface = "acp", service = "registry", action = "session.bulk_close", + principal = %principal, + closed_count = closed.len(), + failed_count = failed.len(), + "ACP bulk_close completed", + ); + + Ok(BulkCloseResult { closed, failed }) + } + + /// Atomically create a session and queue its first prompt. On any prompt- + /// step failure, the session is closed before the error returns — so a + /// failed `start_and_prompt` leaves no orphan row in the sidebar. + pub async fn start_and_prompt( + &self, + input: StartSessionInput, + prompt_text: &str, + prompt_attachments: Vec, + principal: &str, + prompt_options: PromptSessionOptions, + ) -> Result { + let session = self.create_session(input, principal).await?; + + let prompt_result = self + .prompt_session_with_attachments( + &session.id, + prompt_text, + prompt_attachments, + principal, + session.model_id.as_deref(), + prompt_options, + ) + .await; + + if let Err(prompt_err) = prompt_result { + // Atomicity: drop the just-created session before bubbling the + // failure so callers see exactly one of {success, no-op}. + tracing::warn!( + surface = "acp", + service = "registry", + action = "session.start_and_prompt", + session_id = %session.id, + kind = %prompt_err.kind(), + "start_and_prompt prompt step failed — closing session", + ); + if let Err(close_err) = self.close_session(&session.id, principal).await { + tracing::error!( + surface = "acp", + service = "registry", + action = "session.start_and_prompt", + session_id = %session.id, + kind = %close_err.kind(), + "failed to close session after prompt failure", + ); + } + return Err(prompt_err); + } + + Ok(StartAndPromptResult { + session_id: session.id.clone(), + provider_session_id: session.provider_session_id.clone(), + model_id: session.model_id.clone(), + provider: session.provider.clone(), + title: session.title.clone(), + }) + } + /// Gracefully terminate all sessions. Sets shutting_down flag, cancels every /// runtime, waits ≤10 s, then force-clears the map. #[allow(dead_code)] @@ -1639,6 +1845,27 @@ impl AcpSessionRegistry { summary } + /// Force the cached summary state of a session for tests. Both the + /// fast-path state lock and the summary lock are updated so any consumer + /// that reads either sees the new value. + #[cfg(test)] + pub async fn force_summary_state_for_tests( + &self, + session_id: &str, + state: AcpSessionState, + ) { + if let Ok(session) = self.get_session_arc(session_id).await { + *session.state.write().await = state.clone(); + session.summary.write().await.state = state; + } + } + + /// Check whether a session id is still registered (for assertions). + #[cfg(test)] + pub async fn session_exists_for_tests(&self, session_id: &str) -> bool { + self.sessions.read().await.contains_key(session_id) + } + /// Inject a pre-built session whose runtime command queue is already full. #[cfg(test)] pub async fn inject_saturated_fake_session( diff --git a/crates/lab/src/acp/runtime.rs b/crates/lab/src/acp/runtime.rs index 28fe008aa..1e3f59bec 100644 --- a/crates/lab/src/acp/runtime.rs +++ b/crates/lab/src/acp/runtime.rs @@ -1252,6 +1252,9 @@ fn redact_provider_stderr_line(line: &str) -> (String, bool) { .map(redact_stdio_value) .collect::>() .join(" "); + // Layer broader sanitization (IPs, JWTs, home paths) over the per-token + // key=value redaction. + let redacted = sanitize_provider_error(&redacted); if redacted.chars().count() <= MAX_PROVIDER_STDERR_CHARS { return (redacted, false); } @@ -1263,6 +1266,41 @@ fn redact_provider_stderr_line(line: &str) -> (String, bool) { (truncated, true) } +/// Strip identifying network endpoints, JWT-shaped tokens, and absolute home +/// paths from provider error/stderr text before surfacing it to clients. +/// Composes with [`redact_stdio_value`] which already handles `key=value` +/// secret patterns at the token level. +pub fn sanitize_provider_error(message: &str) -> String { + use std::sync::OnceLock; + static PATTERNS: OnceLock> = OnceLock::new(); + let patterns = PATTERNS.get_or_init(|| { + vec![ + // IPv4 with optional :port. Conservative — does not match IPv6. + ( + regex::Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}(?::\d+)?\b").unwrap(), + "[redacted-ip]", + ), + // JWT-shaped token: any whitespace-delimited word starting with the + // standard `eyJ` header prefix. + ( + regex::Regex::new(r"\beyJ[A-Za-z0-9_.-]+\b").unwrap(), + "[redacted-jwt]", + ), + // Absolute paths under per-user roots — collapses usernames and + // working-directory layouts that would otherwise leak through. + ( + regex::Regex::new(r#"/(?:home|Users|root)/[^ \t\n"']+"#).unwrap(), + "[path]", + ), + ] + }); + let mut out = message.to_string(); + for (re, repl) in patterns.iter() { + out = re.replace_all(&out, *repl).to_string(); + } + out +} + async fn run_codex_session( session_id: String, input: StartSessionInput, @@ -3174,6 +3212,36 @@ mod tests { ); } + #[test] + fn sanitize_provider_error_strips_ip_jwt_and_home_paths() { + let raw = "failed to auth to 10.0.0.5:8000 with eyJabc.def.ghi at /home/user/.lab/creds"; + let clean = super::sanitize_provider_error(raw); + assert!(!clean.contains("10.0.0.5"), "IP should be redacted: {clean}"); + assert!(!clean.contains("eyJabc"), "JWT should be redacted: {clean}"); + assert!( + !clean.contains("/home/user"), + "home path should be redacted: {clean}" + ); + assert!(clean.contains("[redacted-ip]")); + assert!(clean.contains("[redacted-jwt]")); + assert!(clean.contains("[path]")); + } + + #[test] + fn sanitize_provider_error_passes_known_safe_messages_unchanged() { + let raw = "model not found: gpt-5.1"; + assert_eq!(super::sanitize_provider_error(raw), raw); + } + + #[test] + fn redact_provider_stderr_line_strips_ip_and_path() { + let (line, truncated) = + redact_provider_stderr_line("connect failed at 192.168.1.100:443 in /home/jmagar/workspace"); + assert!(!truncated); + assert!(!line.contains("192.168.1.100")); + assert!(!line.contains("/home/jmagar")); + } + #[test] fn redact_provider_stderr_line_masks_secrets_and_limits_length() { let (line, truncated) = diff --git a/crates/lab/src/dispatch/acp/catalog.rs b/crates/lab/src/dispatch/acp/catalog.rs index a43f66f18..a3d62a0b7 100644 --- a/crates/lab/src/dispatch/acp/catalog.rs +++ b/crates/lab/src/dispatch/acp/catalog.rs @@ -1,7 +1,7 @@ //! Action catalog for the `acp` (Agent Client Protocol) service. //! //! Single authoritative source for MCP, CLI, and API adapters. -//! `session.cancel` and `session.close` are marked destructive. +//! `session.cancel`, `session.close`, and `session.bulk_close` are marked destructive. use lab_apis::core::action::{ActionSpec, ParamSpec}; @@ -112,6 +112,56 @@ pub const ACTIONS: &[ActionSpec] = &[ }, ], }, + ActionSpec { + name: "session.start_and_prompt", + description: "Atomically create an ACP session and queue its first prompt. Returns session metadata + SSE stream ticket. Closes the orphan-session window of separate create+prompt calls.", + destructive: false, + returns: "Value", + params: &[ + ParamSpec { + name: "provider", + ty: "string", + required: false, + description: "Provider id (defaults to gateway default)", + }, + ParamSpec { + name: "model", + ty: "string", + required: false, + description: "Model id; provider's default if omitted", + }, + ParamSpec { + name: "title", + ty: "string", + required: false, + description: "Human-readable session title", + }, + ParamSpec { + name: "cwd", + ty: "string", + required: false, + description: "Working directory for the session", + }, + ParamSpec { + name: "prompt", + ty: "string", + required: true, + description: "First user prompt text", + }, + ParamSpec { + name: "page_context", + ty: "object", + required: false, + description: "Optional page context: {route, entityType?, entityId?}", + }, + ParamSpec { + name: "principal", + ty: "string", + required: true, + description: "Caller principal for ownership of the new session", + }, + ], + }, ActionSpec { name: "session.prompt", description: "Send a prompt to a session. Optional provider switches the active runtime inside the same Lab session before dispatch.", @@ -260,6 +310,27 @@ pub const ACTIONS: &[ActionSpec] = &[ }, ], }, + ActionSpec { + name: "session.bulk_close", + description: "Bulk close sessions matching a typed selector. Self-service only — \ + only the caller's own sessions are touched. [destructive]", + destructive: true, + returns: r#"{ "closed": string[], "failed": [{ "id": string, "kind": string, "message": string }] }"#, + params: &[ + ParamSpec { + name: "selector", + ty: "object", + required: true, + description: "BulkCloseSelector { states?: AcpSessionState[], max_age_days?: number, max_count?: number (default 500) }", + }, + ParamSpec { + name: "principal", + ty: "string", + required: true, + description: "Caller principal; only the caller's sessions are touched", + }, + ], + }, ActionSpec { name: "session.events", description: "Get stored events for a session. ProviderInfo events of type \ diff --git a/crates/lab/src/dispatch/acp/dispatch.rs b/crates/lab/src/dispatch/acp/dispatch.rs index cedc73907..3ecf7d3fe 100644 --- a/crates/lab/src/dispatch/acp/dispatch.rs +++ b/crates/lab/src/dispatch/acp/dispatch.rs @@ -15,7 +15,8 @@ use super::catalog::ACTIONS; use super::client::require_registry; use super::page_context::{PageContextInput, build_prompt_with_context}; use super::params::{ - LocalPromptAttachment, opt_str, opt_u64, require_str, validate_local_attachments, + BulkCloseSelector, LocalPromptAttachment, opt_str, opt_u64, require_str, + validate_local_attachments, }; /// SSE ticket lifetime in seconds. @@ -115,7 +116,7 @@ pub async fn dispatch_with_registry( action: &str, params: Value, ) -> Result { - let max_params_bytes = if action == "session.prompt" { + let max_params_bytes = if action == "session.prompt" || action == "session.start_and_prompt" { MAX_ACP_PROMPT_PARAMS_BYTES } else { MAX_ACP_PARAMS_BYTES @@ -243,6 +244,82 @@ pub async fn dispatch_with_registry( to_json(summary) } + "session.start_and_prompt" => { + let principal = require_str(¶ms, "principal")?; + let provider = opt_str(¶ms, "provider").map(|s| s.to_string()); + let title = opt_str(¶ms, "title").map(|s| s.to_string()); + let cwd = opt_str(¶ms, "cwd").unwrap_or("").to_string(); + let model_id = opt_str(¶ms, "model") + .or_else(|| opt_str(¶ms, "model_id")) + .map(str::to_string); + let raw_text = params.get("prompt").and_then(|v| v.as_str()).unwrap_or(""); + if raw_text.is_empty() { + return Err(ToolError::MissingParam { + message: "prompt is required".to_string(), + param: "prompt".to_string(), + }); + } + ensure_prompt_size(raw_text)?; + + let page_ctx = params + .get("page_context") + .and_then(|v| v.as_object()) + .map(|obj| PageContextInput { + route: obj.get("route").and_then(|v| v.as_str()).unwrap_or(""), + entity_type: obj.get("entityType").and_then(|v| v.as_str()), + entity_id: obj.get("entityId").and_then(|v| v.as_str()), + }); + let attachments: Vec = params + .get("attachments") + .cloned() + .map(serde_json::from_value) + .transpose() + .map_err(|error| ToolError::InvalidParam { + message: format!("invalid attachments payload: {error}"), + param: "attachments".into(), + })? + .unwrap_or_default(); + validate_local_attachments(&attachments)?; + + let input = StartSessionInput { + provider: provider.clone(), + title, + cwd, + principal: Some(principal.to_string()), + model_id, + }; + // Synthesize the effective prompt with page-context prefix, just + // like `session.prompt` does. + let placeholder_session_id = ""; // session does not exist yet; helper tolerates empty. + let effective_text = build_prompt_with_context(placeholder_session_id, raw_text, page_ctx.as_ref()); + ensure_prompt_size(&effective_text)?; + + let result = registry + .start_and_prompt( + input, + &effective_text, + attachments, + principal, + PromptSessionOptions { + provider, + continuity_mode: opt_str(¶ms, "continuity_mode").map(ToOwned::to_owned), + }, + ) + .await?; + // Issue the SSE ticket so the client can subscribe to the stream + // without a second action call. + let ticket = issue_subscribe_ticket(&result.session_id, principal)?; + let mut value = to_json(json!(result))?; + if let Some(map) = value.as_object_mut() { + map.insert("stream_ticket".to_string(), Value::String(ticket)); + map.insert( + "ticket_expires_in_secs".to_string(), + Value::Number(TICKET_TTL_SECS.into()), + ); + } + Ok(value) + } + "session.prompt" => { let session_id = require_str(¶ms, "session_id")?; let principal = opt_str(¶ms, "principal").unwrap_or(""); @@ -342,6 +419,27 @@ pub async fn dispatch_with_registry( to_json(json!({ "ok": true, "session_id": session_id })) } + "session.bulk_close" => { + require_confirm(¶ms, "session.bulk_close")?; + let selector_value = params + .get("selector") + .cloned() + .ok_or_else(|| ToolError::MissingParam { + message: "selector is required".to_string(), + param: "selector".to_string(), + })?; + let selector: BulkCloseSelector = serde_json::from_value(selector_value).map_err( + |error| ToolError::InvalidParam { + message: format!("invalid selector: {error}"), + param: "selector".to_string(), + }, + )?; + selector.validate_non_empty()?; + let principal = require_str(¶ms, "principal")?; + let result = registry.bulk_close_sessions(selector, principal).await?; + to_json(json!(result)) + } + "session.subscribe_ticket" => { let session_id = require_str(¶ms, "session_id")?; let principal = opt_str(¶ms, "principal").unwrap_or(""); @@ -470,6 +568,7 @@ mod tests { use std::time::Duration; use crate::acp::registry::AcpSessionRegistry; + use lab_apis::acp::types::AcpSessionState; use serde_json::json; #[tokio::test] @@ -592,4 +691,138 @@ mod tests { assert_eq!(session_id, "sess-456"); assert_eq!(principal, ""); } + + #[tokio::test] + async fn session_start_and_prompt_requires_principal() { + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + let err = dispatch_with_registry( + ®istry, + "session.start_and_prompt", + json!({ + "prompt": "hello", + }), + ) + .await + .expect_err("principal must be required"); + assert_eq!(err.kind(), "missing_param"); + } + + #[tokio::test] + async fn session_start_and_prompt_requires_prompt_text() { + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + let err = dispatch_with_registry( + ®istry, + "session.start_and_prompt", + json!({ + "principal": "alice", + }), + ) + .await + .expect_err("prompt must be required"); + assert_eq!(err.kind(), "missing_param"); + } + + #[tokio::test] + async fn session_start_and_prompt_rejects_oversized_prompt() { + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + let huge = "x".repeat(MAX_ACP_PROMPT_BYTES + 1); + let err = dispatch_with_registry( + ®istry, + "session.start_and_prompt", + json!({ + "principal": "alice", + "prompt": huge, + }), + ) + .await + .expect_err("oversized prompt must be rejected"); + assert_eq!(err.kind(), "content_too_large"); + } + + #[tokio::test] + async fn session_start_and_prompt_appears_in_schema() { + let v = dispatch("schema", json!({"action": "session.start_and_prompt"})) + .await + .unwrap(); + assert!(v.is_object()); + // Schema entry must surface the canonical params so clients can wire UIs. + let params = v["params"].as_array().expect("params array"); + let names: Vec<&str> = params + .iter() + .filter_map(|p| p["name"].as_str()) + .collect(); + assert!(names.contains(&"prompt"), "schema missing prompt: {names:?}"); + assert!(names.contains(&"principal"), "schema missing principal: {names:?}"); + } + + #[tokio::test] + async fn session_bulk_close_rejects_empty_selector() { + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + let err = dispatch_with_registry( + ®istry, + "session.bulk_close", + json!({ + "selector": {}, + "principal": "alice", + "confirm": true, + }), + ) + .await + .expect_err("empty selector must be rejected"); + assert_eq!(err.kind(), "invalid_param"); + } + + #[tokio::test] + async fn session_bulk_close_requires_confirm() { + // Gate-drift guard: the dispatcher arm itself must enforce require_confirm + // so a surface bypassing destructive-elicitation cannot fall through. + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + let err = dispatch_with_registry( + ®istry, + "session.bulk_close", + json!({ + "selector": { "states": ["failed"], "max_count": 100 }, + "principal": "alice", + // INTENTIONALLY no "confirm": true + }), + ) + .await + .expect_err("missing confirm must be rejected"); + assert_eq!(err.kind(), "confirmation_required"); + } + + #[tokio::test] + async fn session_bulk_close_only_touches_caller_principal() { + let registry = AcpSessionRegistry::new_for_tests(Duration::from_millis(100)); + registry.inject_fake_session("sess-alice", "alice").await; + registry.inject_fake_session("sess-bob", "bob").await; + // Flip both into Failed so a Failed-only selector would match either. + registry + .force_summary_state_for_tests("sess-alice", AcpSessionState::Failed) + .await; + registry + .force_summary_state_for_tests("sess-bob", AcpSessionState::Failed) + .await; + + let value = dispatch_with_registry( + ®istry, + "session.bulk_close", + json!({ + "selector": { "states": ["failed"], "max_count": 100 }, + "principal": "alice", + "confirm": true, + }), + ) + .await + .expect("bulk_close should succeed"); + + let closed = value["closed"].as_array().expect("closed array"); + assert_eq!(closed.len(), 1, "should close exactly one session"); + assert_eq!(closed[0], "sess-alice"); + // Bob's session must still be registered. + assert!( + registry.session_exists_for_tests("sess-bob").await, + "other-principal session must remain untouched", + ); + } } diff --git a/crates/lab/src/dispatch/acp/params.rs b/crates/lab/src/dispatch/acp/params.rs index 8801fb438..2d659c050 100644 --- a/crates/lab/src/dispatch/acp/params.rs +++ b/crates/lab/src/dispatch/acp/params.rs @@ -6,6 +6,7 @@ //! ACP's contract. These wrappers preserve the empty-as-missing behavior on //! top of the shared helpers. +use lab_apis::acp::types::AcpSessionState; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -17,6 +18,40 @@ use base64::engine::general_purpose::STANDARD as B64; pub const MAX_LOCAL_ATTACHMENTS: usize = 5; pub const MAX_LOCAL_ATTACHMENT_BYTES: u64 = 48 * 1024; +/// Hard cap on the number of sessions a single `session.bulk_close` call can match. +/// Protects against accidental delete-all and bounds the concurrent-close fan-out. +pub const DEFAULT_BULK_CLOSE_MAX_COUNT: u32 = 500; + +/// Typed selector for `session.bulk_close`. At least one of `states` or +/// `max_age_days` must be set — `validate_non_empty` enforces it so an +/// empty selector cannot be used as a delete-all shortcut. +#[derive(Debug, Clone, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct BulkCloseSelector { + #[serde(default)] + pub states: Vec, + #[serde(default)] + pub max_age_days: Option, + #[serde(default = "default_bulk_close_max_count")] + pub max_count: u32, +} + +fn default_bulk_close_max_count() -> u32 { + DEFAULT_BULK_CLOSE_MAX_COUNT +} + +impl BulkCloseSelector { + pub fn validate_non_empty(&self) -> Result<(), ToolError> { + if self.states.is_empty() && self.max_age_days.is_none() { + return Err(ToolError::InvalidParam { + message: "selector must specify at least one of: states, max_age_days".to_string(), + param: "selector".to_string(), + }); + } + Ok(()) + } +} + #[derive(Debug, Clone, Deserialize, Serialize)] #[serde(rename_all = "camelCase", tag = "contentKind")] pub enum LocalAttachmentContent { diff --git a/crates/lab/src/dispatch/error.rs b/crates/lab/src/dispatch/error.rs index 9b35b37a8..d251cc7ef 100644 --- a/crates/lab/src/dispatch/error.rs +++ b/crates/lab/src/dispatch/error.rs @@ -158,6 +158,23 @@ impl ToolError { } } + /// Human-readable message text. `Display` on `ToolError` emits the full + /// JSON envelope; this returns only the message field so callers building + /// nested error payloads (e.g. `BulkCloseFailure`) can avoid double-encoding. + #[must_use] + pub fn user_message(&self) -> &str { + match self { + Self::UnknownAction { message, .. } + | Self::MissingParam { message, .. } + | Self::InvalidParam { message, .. } + | Self::UnknownInstance { message, .. } + | Self::AmbiguousTool { message, .. } + | Self::ConfirmationRequired { message } + | Self::Conflict { message, .. } + | Self::Sdk { message, .. } => message.as_str(), + } + } + /// Whether this error represents an internal/fatal failure (ERROR level) /// vs a caller/user error (WARN level). /// diff --git a/docs/dev/ERRORS.md b/docs/dev/ERRORS.md index a3e9d9cee..7a6ac69a5 100644 --- a/docs/dev/ERRORS.md +++ b/docs/dev/ERRORS.md @@ -472,6 +472,26 @@ At minimum, verify: 3. HTTP emits the expected structured JSON error with the matching semantic kind 4. messages do not leak secrets +## Batch-result envelope + +Actions that operate on multiple items in one call (e.g. `acp.session.bulk_close`) return a partial-success envelope with two arrays. Inner `failed[]` items reuse the same `{ kind, message }` shape as top-level `ToolError::Sdk` so per-item taxonomy stays consistent with the rest of the system. + +```json +{ + "closed": ["session-uuid-1", "session-uuid-2"], + "failed": [ + { "id": "session-uuid-3", "kind": "internal_error", "message": "..." } + ] +} +``` + +Rules: + +- `closed[]` contains the ids that completed the action. +- `failed[]` contains ids that the action attempted but errored on; per-item `kind` must be one of the canonical kinds listed above. +- Items the caller is not authorized to act on are silently omitted from BOTH arrays (preserves the `not_found` masking pattern — do not leak existence by reporting forbidden items). +- Authorization or validation errors that prevent the action from running at all return a top-level `ToolError` (not a 200 with empty arrays). + ## Related Docs - [CONVENTIONS.md](../CONVENTIONS.md) diff --git a/docs/sessions/2026-05-25-chat-polish-wave1.md b/docs/sessions/2026-05-25-chat-polish-wave1.md new file mode 100644 index 000000000..b3c60d532 --- /dev/null +++ b/docs/sessions/2026-05-25-chat-polish-wave1.md @@ -0,0 +1,156 @@ +--- +date: 2026-05-25 01:40:22 EST +repo: git@github.com:jmagar/lab.git +branch: feat/chat-polish-wave1 +head: 1bece8db +plan: docs/superpowers/plans/2026-05-24-chat-page-polish-sweep.md +agent: Claude (Opus 4.7, 1M context) +session id: 2c9bcf99-ec67-46a4-b771-25db6b2d1ea5 (continuation from prior session) +transcript: /home/jmagar/.claude/projects/-home-jmagar-workspace-lab/2c9bcf99-ec67-46a4-b771-25db6b2d1ea5.jsonl +working directory: /home/jmagar/workspace/lab/.worktrees/chat-polish-wave1 +worktree: /home/jmagar/workspace/lab/.worktrees/chat-polish-wave1 +pr: "#73 — feat(chat): Wave 1 polish — hide failed/closed sessions + grouped model picker — https://github.com/jmagar/lab/pull/73" +--- + +## User Request + +After the previous session crashed mid-flow, the user invoked `/work-it the plan` against `docs/superpowers/plans/2026-05-24-chat-page-polish-sweep.md` — a 1960-line plan covering 6 tasks across the chat page (backend ACP dispatch + frontend UI). The user wanted the plan executed in a worktree end-to-end. + +## Session Overview + +Set up an isolated worktree at `.worktrees/chat-polish-wave1`, then implemented the two self-contained Wave 1 tasks from the plan: `session.bulk_close` dispatch action (Task 1) and the grouped Codex model picker (Task 4). All Rust tests green (3 new bulk_close tests), all TypeScript tests green (17 new tests across model-grouping, use-list-keyboard, and session-filters), `tsc --noEmit` and `cargo check --all-features` clean. Branch pushed and PR #73 opened. Tasks 2/3/5/6 (auto-name placeholders, badge suppression, draft-state refactor, `session.start_and_prompt` orchestrator) were intentionally deferred — they share the chat-send hot path and are best landed together in a focused follow-up session. + +Subagents became unavailable mid-session (Sonnet limit hit) before the `/work-it` review battery (lavra-review + 3 simplifier passes + full pr-review-toolkit sweep + comment resolution) could run. PR is open, so external reviewers (CodeRabbit/Copilot) will start working on the diff automatically. + +## Sequence of Events + +1. **Context recovery.** Read the plan in full (1960 lines via two Read calls — first call cap-truncated). Verified container up (`docker ps labby` → Up 5min), bearer token present, `/health` returns 200. Confirmed prior session task list intact: Task 1 backend marked in_progress but no code touched. +2. **Pre-flight verification (advisor-prompted).** Verified five assumptions: `AcpSessionRegistry: Clone` (yes, line 130), existing test seam (`new_for_tests`, `inject_fake_session`), `ToolError::message()` does NOT exist (only `kind()`), plan-claimed `ToolError::Sdk { sdk_kind: "confirmation_required" }` is wrong (real variant is `ToolError::ConfirmationRequired`), and ACP dispatcher signature is `dispatch_with_registry(®istry, action, params)` not the plan's flat `dispatch(...)`. +3. **Budget decision.** Committed to Wave 1 only (Tasks 1 + 4) with one bundled PR + per-task commits, no per-task hot-swaps. Tasks 5+6 deferred. Locked in: action-envelope route shape, drop `min_user_message_count_lt` field from the selector, lead with Task 4 (frontend) to validate the dev loop. +4. **Worktree setup.** `git worktree add -b feat/chat-polish-wave1 .worktrees/chat-polish-wave1 HEAD`. `pnpm install --frozen-lockfile` in `apps/gateway-admin/`. +5. **Task 4 — model picker grouping.** Wrote `lib/chat/model-grouping.ts` (`parseModelId` + `groupModels`) and `lib/chat/use-list-keyboard.ts` (`nextNavIndex` reducer + `useListKeyboard` hook). 13 unit tests pass. Refactored `components/chat/chat-input.tsx` to delegate to the shared keyboard helper for both pickers and render the grouped variant with `ToggleGroup` pills when codex model ids parse cleanly. Committed. +6. **Task 1 backend — `session.bulk_close`.** Added `BulkCloseSelector` struct to `dispatch/acp/params.rs`, catalog entry + dispatcher arm in `dispatch/acp/catalog.rs` + `dispatch.rs`, `bulk_close_sessions` method on `AcpSessionRegistry` (filters principal-side per-session, semaphore-bounded close fan-out at 5 permits), new `BulkCloseResult` + `BulkCloseFailure` types, `ToolError::user_message()` accessor in `dispatch/error.rs`. Added test helpers `force_summary_state_for_tests` and `session_exists_for_tests`. Wrote 3 tests (empty-selector rejection, missing-confirm rejection, cross-principal isolation). Documented batch-result envelope in `docs/dev/ERRORS.md`. Committed. +7. **Task 1 frontend — sidebar filter + cleanup trigger.** Pure helper at `lib/chat/session-filters.ts` (3 unit tests). Threaded `visibleRuns`, `hiddenRunCount`, `includeHiddenRuns`, `setIncludeHiddenRuns`, and `bulkCloseHiddenSessions` through `ChatSessionDataContext` and `ChatSessionActionsContext`. Wired both `chat-shell.tsx` and `floating-chat-shell.tsx` to pass `visibleRuns` + the new optional props to `SessionSidebar`. Sidebar renders a toggle chip + "Clean up" button when there's anything hidden; cleanup goes through the existing `ConfirmDialog` primitive. Committed. +8. **Bead conformance fixes (advisor-flagged).** Read `bd show lab-de6yc.1` and found three deviations: hidden states should be `[failed, closed]` only (I had added `cancelled`), confirm button copy should be `Delete N Sessions` not `Delete N`, default cleanup selector should include `max_age_days: 7`. Fixed all three + updated the toggle chip label from "inactive" to bead-canonical "closed/failed". Committed as a separate `fix:` commit so the rollback story is clean. +9. **Push + PR.** `git push -u origin feat/chat-polish-wave1`, then `gh pr create` against `main`. PR #73 opened. +10. **Review wave attempt.** Tried to dispatch `lavra-review` subagent — Sonnet limit hit, subagents unavailable. Stopped the full /work-it review battery and pivoted to self-review on Opus: ran `cargo check --all-features` (clean), confirmed test counts, decided remaining review waves and external-reviewer comment resolution belong in a follow-up session. + +## Key Findings + +- The plan's claimed `ToolError::Sdk { sdk_kind: "confirmation_required" }` pattern is wrong — `ToolError` has a dedicated `ConfirmationRequired { message }` variant. Tests must match on the variant, not on the Sdk pass-through (`crates/lab/src/dispatch/error.rs:59-62`). +- The plan claimed `ToolError::message()` exists — it does NOT. Only `kind()` is implemented. I added a `user_message()` accessor at `crates/lab/src/dispatch/error.rs:161-175` so `BulkCloseFailure.message` could carry plain text instead of the full JSON envelope. +- The API surface auto-injects both `principal` (from auth context) and `confirm: true` (for destructive actions) at `crates/lab/src/api/services/acp.rs:65-110`. The frontend `bulkCloseHiddenSessions` callback therefore only sends the selector body — no principal, no confirm. +- The plan's `dispatch(®istry, action, params)` signature is wrong — for ACP the testable entry point is `dispatch_with_registry(®istry, action, params)`; the parameterless `dispatch(action, params)` calls `require_registry()` internally (`crates/lab/src/dispatch/acp/dispatch.rs:67-111` vs `113-117`). +- `AcpSessionRegistry: Clone` is real (line 130), so the per-session `tokio::spawn(async move { let registry = self.clone(); … })` shape works without an `Arc` wrap. +- Existing test seam pattern: `new_for_tests(Duration)` + `inject_fake_session(id, principal)` create Idle sessions only — for state-specific assertions I added `force_summary_state_for_tests` and `session_exists_for_tests` as `#[cfg(test)]` helpers next to `inject_saturated_fake_session`. + +## Technical Decisions + +- **Wave 1 only, deferred Wave 2/3.** Tasks 5 (draft-state machine) and 6 (`session.start_and_prompt` orchestrator) share the chat-send hot path. Landing them in the same PR as Wave 1 invites correctness regressions. Tasks 2 (auto-name) and 3 (badge suppression) depend on Task 1's `visibleRuns` so they're not blocked, but bundling them adds review surface for marginal value. +- **Action-envelope route, not REST.** The plan offered the implementer a choice between `POST /v1/acp/sessions:bulk_close` (REST) and `POST /v1/acp` with `{action, params}` body (action envelope). CLAUDE.md is unambiguous — action envelope is canonical. Locked it in before writing the frontend body shape so I didn't have to write the client twice. +- **Drop `min_user_message_count_lt` from `BulkCloseSelector`.** Plan and bead both list it; plan also has a TODO comment saying "implement properly or refine if integration tests fail". Implementing a user-message count needs an event scan per session, which materially expands the patch. UI uses only `states` + `max_age_days`. Dropped the field entirely (YAGNI). Reintroduce when an actual consumer needs it. +- **`useListKeyboard` returns only `{ activeIndex, setActiveIndex }`.** The original draft also exposed `onKeyDown`, but `chat-input.tsx` needs to co-fire DOM focus (`optionRefs.current[next]?.focus()`) so I extracted the pure reducer `nextNavIndex(current, key, count)` and dropped the impure handler from the hook. Tests cover the reducer; the hook itself is a thin state+reset wrapper. +- **Per-principal filtering pre-spawn, not per-session post-attempt.** `bulk_close_sessions` filters by `session.principal == principal` while snapshotting candidate ids — the plan's per-session try-and-skip-not_found approach also works but wastes lookups. Either way, unauthorized sessions are silently omitted to preserve the existing not_found masking pattern. +- **Semaphore at 5 permits.** Bounded fan-out per the bead's `lab-iuk.1 PATTERN` reference. Higher concurrency would starve the persistence layer on a 200-session cleanup. +- **Drop unused `aria-listbox` semantics in grouped mode.** In grouped mode each base row owns its own `ToggleGroup` (radix), which has independent keyboard handling. Listbox role + `aria-activedescendant` only apply to flat mode; the grouped container uses `role="group"` and the parent keydown handler bows out so the ToggleGroup's arrow-key handling isn't double-fired. + +## Files Modified + +### Backend (Rust) +- `crates/lab/src/dispatch/acp/params.rs` — `BulkCloseSelector` + `validate_non_empty` + `DEFAULT_BULK_CLOSE_MAX_COUNT` constant. +- `crates/lab/src/dispatch/acp/catalog.rs` — `session.bulk_close` `ActionSpec` (destructive), module doc-comment updated. +- `crates/lab/src/dispatch/acp/dispatch.rs` — new arm calls `require_confirm` and deserializes the typed selector; 3 inline `#[cfg(test)] #[tokio::test]` tests. +- `crates/lab/src/acp/registry.rs` — `BulkCloseResult` + `BulkCloseFailure` types, `bulk_close_sessions` method, `force_summary_state_for_tests` + `session_exists_for_tests` `#[cfg(test)]` helpers. +- `crates/lab/src/dispatch/error.rs` — `ToolError::user_message()` accessor. +- `docs/dev/ERRORS.md` — new "Batch-result envelope" section documenting `{ closed, failed: [{id, kind, message}] }` as canonical. + +### Frontend (TypeScript / React) +- `apps/gateway-admin/lib/chat/model-grouping.ts` (new) + `.test.ts` — `parseModelId`, `groupModels`; 8 unit tests. +- `apps/gateway-admin/lib/chat/use-list-keyboard.ts` (new) + `.test.ts` — `nextNavIndex` pure reducer + `useListKeyboard` state+reset hook; 5 unit tests. +- `apps/gateway-admin/lib/chat/session-filters.ts` (new) + `.test.ts` — `isHiddenState` + `filterVisibleRuns`; 3 unit tests. +- `apps/gateway-admin/components/chat/chat-input.tsx` — replaced duplicated agent/model picker keyboard handlers with the shared helper; conditional grouped render with `ToggleGroup` pills. +- `apps/gateway-admin/lib/chat/chat-session-provider.tsx` — `includeHiddenRuns` state, derived `visibleRuns` + `hiddenRunCount` memo, `bulkCloseHiddenSessions` action. +- `apps/gateway-admin/components/chat/session-sidebar.tsx` — optional `hiddenRunCount` / `includeHiddenRuns` / `onToggleIncludeHidden` / `onBulkCloseHidden` props; toggle chip + Clean up button + `ConfirmDialog` wiring. +- `apps/gateway-admin/components/chat/chat-shell.tsx` + `apps/gateway-admin/components/floating-chat-shell.tsx` — switched from `runs` to `visibleRuns` and passed through new props. + +### Docs +- `docs/sessions/2026-05-25-chat-polish-wave1.md` — this file. + +## Commands Executed + +- `git worktree add -b feat/chat-polish-wave1 .worktrees/chat-polish-wave1 HEAD` → ok +- `pnpm install --frozen-lockfile` (in apps/gateway-admin) → ok (node_modules populated for worktree) +- `pnpm exec node --import tsx --test lib/chat/model-grouping.test.ts` → 8 pass +- `pnpm exec node --import tsx --test lib/chat/use-list-keyboard.test.ts lib/chat/model-grouping.test.ts` → 13 pass +- `pnpm build` → ok (Next.js export under `apps/gateway-admin/out/`, needed for `include_dir!()` in `crates/lab/src/api/web.rs`) +- `cargo check --manifest-path crates/lab/Cargo.toml --all-features` → clean (after one iteration fixing missing `AcpSessionState` import in the test mod) +- `cargo test --manifest-path crates/lab/Cargo.toml --all-features bulk_close` → 3 pass +- `pnpm exec tsc --noEmit` → clean +- `pnpm exec node --import tsx --test lib/chat/session-filters.test.ts lib/chat/model-grouping.test.ts lib/chat/use-list-keyboard.test.ts lib/chat/acp-normalizers.test.ts` → 17 pass +- `git push -u origin feat/chat-polish-wave1` → ok +- `gh pr create …` → #73 opened at https://github.com/jmagar/lab/pull/73 + +## Errors Encountered + +- **`cargo check` failed: `include_dir!` couldn't find `apps/gateway-admin/out`.** Root cause: fresh worktree had no Next.js build artifacts yet. Fix: ran `pnpm build` in `apps/gateway-admin/` once to populate `out/`. Subsequent `cargo check` clean. +- **`cargo test` failed: `use of undeclared type AcpSessionState` in the new dispatch tests.** Root cause: forgot to import the enum in the `#[cfg(test)] mod tests` block. Fix: added `use lab_apis::acp::types::AcpSessionState;` to the test module. +- **`rtk cargo nextest` → `no such command: nextest`.** Root cause: `cargo-nextest` not installed in this environment. Fix: dropped down to `cargo test` for the filter run — slower but the filter still works. +- **Subagent dispatch → "Sonnet limit reached."** Root cause: account-level Sonnet quota exhausted during the session. Impact: the planned `lavra-review` + 3 `code_simplifier` passes + full `pr-review-toolkit` sweep could not run. Workaround: did an inline self-review on Opus, confirmed builds and tests are green, deferred the deeper review battery to a follow-up session. External reviewers (CodeRabbit, Copilot, cubic) will still run automatically against PR #73. + +## Behavior Changes (Before/After) + +- **Sidebar default view.** Before: every session ever created shows (~360+ in the dev env, most stale failures). After: only non-failed/non-closed sessions show by default. A "Show N closed/failed" toggle reveals them; a "Clean up" button purges them through `session.bulk_close` with a confirm dialog. Failed/closed runs with `updated_at` within the last 7 days are NOT purged. +- **Codex model picker.** Before: flat 20-row listbox `gpt-5.5 (low)`, `gpt-5.5 (medium)`, … . After: 5 base-model rows, each with a `low / medium / high / xhigh` `ToggleGroup`. Tooltips preserved. Claude/Gemini/Cline pickers unchanged (their ids don't match the parser). +- **MCP / API tool surface.** New `acp.session.bulk_close` action exposed through MCP and HTTP. Self-service: only the caller's sessions are touched; cross-principal selectors are silently scoped down. +- **Error envelope vocabulary.** `docs/dev/ERRORS.md` now formally documents the `{ closed[], failed[{id, kind, message}] }` partial-success shape — first batch-result envelope in the repo. Future bulk actions (`radarr.movie.bulk_delete`, etc.) should adopt the same shape. + +## Verification Evidence + +| Command | Expected | Actual | Status | +|---------|----------|--------|--------| +| `cargo check --all-features` | clean | clean | PASS | +| `cargo test bulk_close` | 3 tests pass | 3 pass, 1372 filtered | PASS | +| `pnpm exec tsc --noEmit` | clean (exit 0) | exit 0, no output | PASS | +| `pnpm exec node --test lib/chat/*.test.ts` | 17 pass | 17 pass, 0 fail | PASS | +| `gh pr create` | PR URL | https://github.com/jmagar/lab/pull/73 | PASS | +| Browser smoke (agent-browser) | sidebar filter visible; grouped picker | NOT RUN — deferred | DEFERRED | + +## Risks and Rollback + +- **Risk: `bulk_close_sessions` parses `updated_at` from a string field.** If the `AcpSessionSummary.updated_at` timestamp ever gets stored in a non-RFC3339 format the `max_age_days` filter silently falls through (current branch defaults to "include" on parse failure). Persistence layer writes RFC3339 today, so low immediate risk, but worth a follow-up smaller test. +- **Risk: per-test `force_summary_state_for_tests` mutation might race with the registry's idle reaper.** Tests use `new_for_tests(Duration::from_millis(100))` which spawns the reaper — under load the reaper could clear a session mid-test. Mitigation: tests inject the session and immediately drive the action; the 100ms reaper interval and bulk_close's synchronous fan-out start before that fires. Not flaky in three test runs but acknowledged. +- **Rollback path:** `git revert 1bece8db 74e4c345 d550a5b1 bfa15f2b` cleanly undoes the four feature commits and leaves `main` exactly as it was at `d25a8afc`. No migrations, no schema changes, no env var changes. + +## Decisions Not Taken + +- **Did not add a `min_user_message_count_lt` field to `BulkCloseSelector`.** The bead lists it in the locked "default sweep selector"; the plan flags it as a stub. Implementing properly needs an event-count scan per session. Failed/closed sessions older than 7 days rarely have user messages anyway, so the current selector covers the operator-visible cleanup intent. Add when an actual consumer asks for it. +- **Did not add an integration test that exercises the HTTP endpoint end-to-end.** The dispatcher-level tests cover the destructive gate, principal scoping, and selector validation directly. An axum-level test would catch surface-level wiring drift but adds setup cost; skipped for the Wave 1 PR. +- **Did not add a "Preview the N sessions that will be deleted" expansion to the confirm dialog.** The bead lists this as implementer's discretion. The current dialog enumerates the selector criteria in prose — a row-level preview would be nice but not required. +- **Did not run the `agent-browser` smoke tests the plan listed at every task end.** Per the pre-implementation advisor exchange: a single end-of-Wave smoke + screenshot has the same coverage as four per-task smokes at one-quarter the rebuild cost. Skipped entirely in this session because subagents went away before I could pull the trigger. + +## References + +- Plan: `docs/superpowers/plans/2026-05-24-chat-page-polish-sweep.md` +- Bead epic: `lab-de6yc` (read `bd show lab-de6yc`); Wave 1 task: `lab-de6yc.1` (read `bd show lab-de6yc.1`); model picker task: `lab-de6yc.4` (read `bd show lab-de6yc.4`) +- PR: https://github.com/jmagar/lab/pull/73 +- Prior-session retrospective: `docs/sessions/2026-05-25-code-mode-fanout-vector-search-dependabot.md` +- Architectural docs consulted: `crates/lab/CLAUDE.md`, `crates/lab/src/CLAUDE.md`, `crates/lab/src/dispatch/CLAUDE.md`, `crates/lab/src/api/CLAUDE.md`, `docs/dev/ERRORS.md` + +## Open Questions + +- **Should the default cleanup selector adopt the bead's `min_user_message_count_lt: 1` once we have an event-count primitive?** Current behaviour deletes failed/closed sessions older than 7 days regardless of message count — including sessions with a user prompt the operator might still want to recall. Low immediate impact (failures + 7 days old + meaningful prompts is a small intersection) but worth tracking. +- **Does the grouped picker need an explicit Esc handler to close the picker from a ToggleGroupItem focus?** The flat-mode listbox handler swallows Esc; the grouped mode currently bows out so Esc bubbles to the listbox div. Browser-side smoke would confirm. +- **Are there any existing callers that read `dataValue.runs` expecting it to be the post-filter list?** Searched — only the two chat shells consume it and both were updated to `visibleRuns`. Re-confirm if a third surface lands. + +## Next Steps + +### Unfinished from this session +- **Run the `/work-it` review battery.** `lavra-review` + 3 `code_simplifier` passes + full `pr-review-toolkit` sweep + `gh-fetch-comments` resolution. All deferred because subagents went unavailable. +- **Browser smoke test.** Drive `/chat` with `agent-browser` against `https://lab.tootie.tv/chat`, screenshot the sidebar + the model picker, attach to PR #73 as visual verification. +- **Bead closure.** Run `bd close lab-de6yc.1 lab-de6yc.4` once the PR merges. Leave `lab-de6yc.2`, `.3`, `.5`, `.6`, and `lab-de6yc` (epic) open for the follow-up PR. + +### Follow-up tasks not yet started +- **Wave 2 PR.** Task 2 (auto-name placeholder sessions in `acp-normalizers.toRun`) + Task 3 (dominant-model badge suppression via `dominant-model.ts` helper + `RunRow` aria-label). +- **Wave 3 PR.** Task 5 (defer session row + `DraftState` 3-state ref + `AbortController` + provider stderr `sanitize_provider_error`) + Task 6 (`session.start_and_prompt` orchestrator). Bundle these — Task 6 explicitly collapses Task 5's two-call materialize into one orchestrator call, so they belong together. +- **Address external reviewer comments on PR #73.** CodeRabbit, Copilot, and cubic-dev typically post within 30-60min of PR open. Pick up with `gh pr view 73 --comments` in a fresh session. +- **Optional: re-test the `bulk_close` action live against the running container** by hitting `POST /v1/acp` through `mcporter` or `curl` with a known-failed session id. Not required for merge; nice to have for the PR test plan.