From 48b5338fdecbabad57ff3baec15861d17cd0de4d Mon Sep 17 00:00:00 2001 From: Serhii Vecherenko Date: Mon, 15 Jun 2026 19:49:31 -0700 Subject: [PATCH] feat(chat): render inline tool-generated images in chat - Extract tool-result image blocks in ACP and Claude canonical mappers - Add ImageView with lightbox, clipboard copy, and save-as IPC handlers - Extend tool-call payload schema and keep image tool calls discrete on reload - Reuse ImageLightbox for shared fullscreen preview in chat and composer --- src/main/attachments/localFiles.ts | 5 + src/main/ipc/localHandlers.ts | 23 ++ .../components/composer/ImageLightbox.tsx | 41 ++- src/renderer/components/composer/index.ts | 2 +- .../thread/ChatPane/chatPaneSelectors.ts | 15 ++ .../parts/items/AssistantMessage.test.tsx | 59 ++++ .../ChatPane/parts/items/AssistantMessage.tsx | 23 +- .../ChatPane/parts/items/ChatItemRow.tsx | 9 +- .../ChatPane/parts/items/ImageView.test.tsx | 72 +++++ .../thread/ChatPane/parts/items/ImageView.tsx | 184 +++++++++++++ .../parts/items/imageViewSource.test.ts | 161 +++++++++++ .../ChatPane/parts/items/imageViewSource.ts | 253 ++++++++++++++++++ .../state/chatRuntimePersister.test.ts | 27 ++ src/renderer/state/chatRuntimePersister.ts | 14 + src/shared/contracts/runtimeEvent.ts | 8 + src/shared/ipc/procedureMap.ts | 2 + src/shared/ipc/procedures/app.ts | 12 + src/shared/ipc/schemas.ts | 12 + .../agents/acp/canonicalMapping.test.ts | 39 +++ src/supervisor/agents/acp/canonicalMapping.ts | 30 +++ .../agents/claude/sdkCanonicalMapping.test.ts | 48 ++++ .../agents/claude/sdkCanonicalMapping.ts | 42 ++- 22 files changed, 1070 insertions(+), 11 deletions(-) create mode 100644 src/renderer/components/thread/ChatPane/parts/items/AssistantMessage.test.tsx create mode 100644 src/renderer/components/thread/ChatPane/parts/items/ImageView.test.tsx create mode 100644 src/renderer/components/thread/ChatPane/parts/items/ImageView.tsx create mode 100644 src/renderer/components/thread/ChatPane/parts/items/imageViewSource.test.ts create mode 100644 src/renderer/components/thread/ChatPane/parts/items/imageViewSource.ts diff --git a/src/main/attachments/localFiles.ts b/src/main/attachments/localFiles.ts index 0fba6dfb..a3fb3f54 100644 --- a/src/main/attachments/localFiles.ts +++ b/src/main/attachments/localFiles.ts @@ -26,6 +26,11 @@ export function saveClipboardImageFile( return filePath; } +/** Write raw image bytes to a user-chosen absolute path (download "Save as…"). */ +export function writeImageFile(filePath: string, data: Uint8Array): void { + writeFileSync(filePath, Buffer.from(data)); +} + export function saveHandoffContextFile( paths: LightcodePaths, payload: { threadId: string; content: string }, diff --git a/src/main/ipc/localHandlers.ts b/src/main/ipc/localHandlers.ts index e0954423..836cd1f8 100644 --- a/src/main/ipc/localHandlers.ts +++ b/src/main/ipc/localHandlers.ts @@ -28,6 +28,7 @@ import { resolveProjectFsPath, saveClipboardImageFile, saveHandoffContextFile, + writeImageFile, } from "../attachments/localFiles"; import { createProjectDirectory } from "../projectDirectory"; import { @@ -132,6 +133,28 @@ export function createLocalIpcHandlers( saveClipboardImageFile(options.requireLightcodePaths(), payload), saveHandoffContext: (payload) => saveHandoffContextFile(options.requireLightcodePaths(), payload), + saveImageFile: async ({ data, suggestedName }) => { + const win = options.getMainWindow(); + const result = await dialog.showSaveDialog(win!, { + title: "Save image", + defaultPath: suggestedName, + filters: [ + { name: "Images", extensions: ["png", "jpg", "jpeg", "gif", "webp", "svg", "bmp"] }, + ], + }); + if (result.canceled || !result.filePath) return null; + writeImageFile(result.filePath, data); + return result.filePath; + }, + copyImageToClipboard: ({ data }) => { + // `nativeImage.createFromBuffer` only decodes PNG/JPEG; the renderer + // converts other formats to PNG first. Report whether anything landed on + // the clipboard so the UI doesn't claim success on an empty image. + const image = nativeImage.createFromBuffer(Buffer.from(data)); + if (image.isEmpty()) return false; + clipboard.writeImage(image); + return true; + }, createProjectDirectory: (payload) => createProjectDirectory(payload), openExternal: async (url) => { const safeUrl = assertSafeExternalUrl(url); diff --git a/src/renderer/components/composer/ImageLightbox.tsx b/src/renderer/components/composer/ImageLightbox.tsx index 1c0858e9..86ebe028 100644 --- a/src/renderer/components/composer/ImageLightbox.tsx +++ b/src/renderer/components/composer/ImageLightbox.tsx @@ -1,13 +1,46 @@ -import { useEffect, useState } from "react"; +import { useEffect, useMemo, useState } from "react"; import { createPortal } from "react-dom"; import { ChevronLeft, ChevronRight, X } from "lucide-react"; import { toLocalFileUrl } from "@/shared/promptContent"; import type { Attachment } from "./useAttachments"; +/** A pre-resolved image for the lightbox: a renderable URL plus an accessible label. */ +export interface LightboxImage { + /** Renderable image URL — a `data:`, `lightcode-local://`, or remote URL. */ + src: string; + /** Accessible label / alt text. */ + alt?: string; +} + +/** + * Attachment-backed lightbox used by the composer surfaces. Resolves each + * attachment's local path to a renderable URL and defers to + * {@link ImageLightboxView}. + */ export function ImageLightbox(props: { images: Attachment[]; initialIndex: number; onClose: () => void; +}) { + const images = useMemo( + () => props.images.map((img) => ({ src: toLocalFileUrl(img.path), alt: img.name })), + [props.images], + ); + return ( + + ); +} + +/** + * Source-agnostic fullscreen image viewer. Accepts already-resolved image URLs + * (`data:`, `lightcode-local://`, remote) so it can be reused for chat-generated + * images as well as composer attachments. Supports keyboard nav and prev/next + * chrome for multi-image galleries; a single image renders without that chrome. + */ +export function ImageLightboxView(props: { + images: LightboxImage[]; + initialIndex: number; + onClose: () => void; }) { const { images, initialIndex, onClose } = props; const [index, setIndex] = useState(initialIndex); @@ -40,7 +73,7 @@ export function ImageLightbox(props: { onClick={onClose} role="dialog" aria-modal="true" - aria-label={current.name ?? "Image preview"} + aria-label={current.alt ?? "Image preview"} > +
+ + {source.alt} + + + + + setLightboxOpen(true)}> + + + +
+ {isLightboxOpen ? ( + setLightboxOpen(false)} + /> + ) : null} + + ); +} + +function CopyImageButton({ source }: { source: ImageViewSource }) { + const [copied, setCopied] = useState(false); + + async function onCopy() { + try { + const data = await toClipboardPngBytes(source); + const ok = await readBridge().copyImageToClipboard({ data }); + if (!ok) { + console.warn("Clipboard rejected the image (unsupported format)"); + return; + } + setCopied(true); + window.setTimeout(() => setCopied(false), 1500); + } catch (err) { + console.error("Failed to copy image to clipboard", err); + } + } + + return ( + + {copied ? : } + + ); +} + +function DownloadImageButton({ src, fileName }: { src: string; fileName: string }) { + async function onDownload() { + try { + const data = await fetchImageBytes(src); + await readBridge().saveImageFile({ data, suggestedName: fileName }); + } catch (err) { + console.error("Failed to save image", err); + } + } + + return ( + + + + ); +} + +function IconButton({ + label, + onClick, + children, +}: { + label: string; + onClick: () => void; + children: ReactNode; +}) { + return ( + + + + + {label} + + ); +} + +async function fetchImageBytes(src: string) { + const response = await fetch(src); + if (!response.ok) throw new Error(`Failed to load image (${response.status})`); + return new Uint8Array(await response.arrayBuffer()); +} + +/** + * Bytes to hand the OS clipboard. The native clipboard (Electron `nativeImage`) + * only decodes PNG/JPEG, so those pass straight through; other raster formats + * (GIF/WebP/BMP) are decoded and re-encoded to PNG via a canvas. If conversion + * isn't possible (e.g. an unsized SVG), the raw bytes are returned and the main + * process reports the empty-image case back so the UI doesn't fake success. + */ +async function toClipboardPngBytes(source: ImageViewSource) { + if (source.mime === "image/png" || source.mime === "image/jpeg") { + return fetchImageBytes(source.src); + } + try { + const blob = await (await fetch(source.src)).blob(); + const bitmap = await createImageBitmap(blob); + const canvas = document.createElement("canvas"); + canvas.width = bitmap.width; + canvas.height = bitmap.height; + const ctx = canvas.getContext("2d"); + if (!ctx || canvas.width === 0 || canvas.height === 0) return fetchImageBytes(source.src); + ctx.drawImage(bitmap, 0, 0); + const pngBlob = await new Promise((resolve) => + canvas.toBlob(resolve, "image/png"), + ); + if (!pngBlob) return fetchImageBytes(source.src); + return new Uint8Array(await pngBlob.arrayBuffer()); + } catch { + return fetchImageBytes(source.src); + } +} diff --git a/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.test.ts b/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.test.ts new file mode 100644 index 00000000..ddcbaa7b --- /dev/null +++ b/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.test.ts @@ -0,0 +1,161 @@ +import { describe, expect, it } from "vitest"; +import { + imageViewHasRenderableImage, + imageViewRendersInline, + imageViewSourceFromImageBlock, + resolveImageViewSource, +} from "./imageViewSource"; + +// A minimal valid 1x1 PNG, base64-encoded (starts with the PNG magic prefix). +const PNG_BASE64 = + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="; + +describe("resolveImageViewSource", () => { + it("resolves raw base64 PNG from a string result into a data URL", () => { + const source = resolveImageViewSource({ + name: "imageGeneration", + status: "success", + result: PNG_BASE64, + args: { prompt: "A red square" }, + }); + expect(source).not.toBeNull(); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + expect(source?.mime).toBe("image/png"); + expect(source?.extension).toBe("png"); + expect(source?.fileName).toBe("a-red-square.png"); + expect(source?.alt).toBe("A red square"); + }); + + it("prefers payload.images (the provider-agnostic channel) over result", () => { + // ACP / Claude mappers populate `images` with data: URLs. + const source = resolveImageViewSource({ + name: "generate_image", + status: "success", + result: "Generated an image of a red square.", + images: [`data:image/png;base64,${PNG_BASE64}`], + }); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + expect(source?.mime).toBe("image/png"); + }); + + it("passes through an existing data: URL result", () => { + const dataUrl = `data:image/png;base64,${PNG_BASE64}`; + const source = resolveImageViewSource({ name: "imageGeneration", result: dataUrl }); + expect(source?.src).toBe(dataUrl); + expect(source?.mime).toBe("image/png"); + }); + + it("strips whitespace/newlines from chunked base64", () => { + const chunked = `${PNG_BASE64.slice(0, 20)}\n${PNG_BASE64.slice(20)}`; + const source = resolveImageViewSource({ name: "imageGeneration", result: chunked }); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + }); + + it("reads base64 from an object result field (b64_json)", () => { + const source = resolveImageViewSource({ + name: "imageGeneration", + result: { b64_json: PNG_BASE64 }, + }); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + }); + + it("reads an image entry out of a result array", () => { + const source = resolveImageViewSource({ + name: "imageGeneration", + result: { data: [{ b64_json: PNG_BASE64 }] }, + }); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + }); + + it("detects a JPEG magic prefix", () => { + const jpeg = "/9j/4AAQSkZJRgABAQAAAQABAAD"; + const source = resolveImageViewSource({ name: "imageGeneration", result: jpeg }); + expect(source?.mime).toBe("image/jpeg"); + expect(source?.extension).toBe("jpg"); + }); + + it("does NOT render agent-supplied URLs or file paths (inline-only, no outbound requests)", () => { + // Remote URL → would be a tracking pixel / SSRF if auto-loaded. + expect( + resolveImageViewSource({ name: "imageGeneration", result: "https://attacker.example/p.png" }), + ).toBeNull(); + // file:// and lightcode-local:// → would read local files on view/copy. + expect( + resolveImageViewSource({ name: "imageGeneration", result: "file:///C:/secret.png" }), + ).toBeNull(); + expect( + resolveImageViewSource({ + name: "imageGeneration", + result: { url: "lightcode-local://local/C:/Users/me/secret.png" }, + }), + ).toBeNull(); + // A filesystem path on args is no longer promoted to an image. + expect( + resolveImageViewSource({ name: "ViewImage", args: { path: "/tmp/pic.png" } }), + ).toBeNull(); + }); + + it("returns null for non-image text results", () => { + expect( + resolveImageViewSource({ + name: "imageGeneration", + result: "Here is your image description.", + }), + ).toBeNull(); + expect(resolveImageViewSource({ name: "imageGeneration", status: "running" })).toBeNull(); + expect(resolveImageViewSource(undefined)).toBeNull(); + }); + + it("falls back to a generic filename/alt when no prompt is present", () => { + const source = resolveImageViewSource({ name: "imageGeneration", result: PNG_BASE64 }); + expect(source?.alt).toBe("Generated image"); + expect(source?.fileName).toBe("generated-image.png"); + }); +}); + +describe("imageViewHasRenderableImage", () => { + it("agrees with resolveImageViewSource on image presence", () => { + expect(imageViewHasRenderableImage({ name: "imageGeneration", result: PNG_BASE64 })).toBe(true); + expect(imageViewHasRenderableImage({ name: "imageGeneration", result: "just text" })).toBe( + false, + ); + expect(imageViewHasRenderableImage(undefined)).toBe(false); + }); +}); + +describe("imageViewSourceFromImageBlock", () => { + it("builds a source from a canonical assistant-message image block", () => { + const source = imageViewSourceFromImageBlock({ + dataUrl: `data:image/png;base64,${PNG_BASE64}`, + mimeType: "image/png", + name: "diagram", + }); + expect(source?.src).toBe(`data:image/png;base64,${PNG_BASE64}`); + expect(source?.mime).toBe("image/png"); + expect(source?.alt).toBe("diagram"); + expect(source?.fileName).toBe("diagram.png"); + }); + + it("returns null for a non-image / missing data URL", () => { + expect(imageViewSourceFromImageBlock({ dataUrl: "https://example.com/x.png" })).toBeNull(); + expect(imageViewSourceFromImageBlock({ dataUrl: "" })).toBeNull(); + expect(imageViewSourceFromImageBlock({})).toBeNull(); + }); +}); + +describe("imageViewRendersInline", () => { + it("is true only for a non-errored payload that carries a renderable image", () => { + expect(imageViewRendersInline({ name: "imageGeneration", result: PNG_BASE64 })).toBe(true); + expect( + imageViewRendersInline({ name: "imageGeneration", status: "success", result: PNG_BASE64 }), + ).toBe(true); + }); + + it("is false when the tool errored even if a renderable image is present", () => { + // Mirrors ImageView falling back to the tool-call accordion on error, so + // the grouping selector keeps the row grouped instead of un-grouping it. + expect( + imageViewRendersInline({ name: "imageGeneration", status: "error", result: PNG_BASE64 }), + ).toBe(false); + }); +}); diff --git a/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.ts b/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.ts new file mode 100644 index 00000000..85b448fa --- /dev/null +++ b/src/renderer/components/thread/ChatPane/parts/items/imageViewSource.ts @@ -0,0 +1,253 @@ +/** + * Resolve a renderable image out of an `image_view` tool-call payload. + * + * Agents that generate images (e.g. Codex's `imageGeneration`, Claude image + * tools) carry the picture inline on the tool-call `result` — usually as raw + * base64 PNG, sometimes as a `data:` URL or a `{ image | b64_json | ... }` + * object. This module turns that into a single {@link ImageViewSource} the + * renderer can drop straight into an `` — or `null` when the payload has + * no usable image yet (still running, errored, or genuinely not an image), in + * which case the row falls back to the generic tool-call accordion. + * + * Security note: we deliberately resolve ONLY self-contained inline images + * (`data:` URLs, magic-detected base64, raw ``). We never promote an + * agent-supplied `http(s)://`, `file://`, `lightcode-local://`, or filesystem + * path into an `` — that would let a malicious/prompt-injected tool + * result trigger an outbound request (tracking pixel / SSRF) or read a local + * file on the user's machine simply because the user viewed the thread. Such + * payloads fall back to the inert tool-call accordion. + * + * `imageViewHasRenderableImage` is the cheap O(1) probe used on the hot timeline + * selector path (grouping): it never allocates the multi-MB data URL — it only + * inspects a short prefix. `resolveImageViewSource` does the full build and is + * memoized per item by the component. + */ + +export interface ImageViewSource { + /** Renderable inline image URL: a `data:` URL (base64) or an svg `data:` URL. */ + src: string; + /** Image MIME type, e.g. `"image/png"`. */ + mime: string; + /** Lower-case file extension without the dot, e.g. `"png"`. */ + extension: string; + /** Suggested file name for downloads, e.g. `"generated-image.png"`. */ + fileName: string; + /** Accessible label / alt text — the prompt when available, else a generic label. */ + alt: string; +} + +const EXTENSION_BY_MIME: Record = { + "image/png": "png", + "image/jpeg": "jpg", + "image/gif": "gif", + "image/webp": "webp", + "image/bmp": "bmp", + "image/svg+xml": "svg", +}; + +/** + * Base64 magic prefixes → MIME. Prefixes are long enough to be unambiguous so a + * prefix match is a reliable "this string is an encoded image" signal without + * decoding the (potentially multi-MB) body. + */ +const BASE64_IMAGE_SIGNATURES: ReadonlyArray = [ + ["iVBORw0KGgo", "image/png"], + ["/9j/", "image/jpeg"], + ["R0lGOD", "image/gif"], + ["UklGR", "image/webp"], // RIFF container + ["PHN2Zw", "image/svg+xml"], // "`-ready source, or `null` when there's no image. */ +export function resolveImageViewSource(payload: unknown): ImageViewSource | null { + const found = findClassifiedCandidate(payload); + if (!found) return null; + const { value, classification } = found; + const src = buildSrc(value, classification); + if (!src) return null; + const mime = classification.mime; + const extension = EXTENSION_BY_MIME[mime] ?? "png"; + const alt = readPromptText(payload) ?? "Generated image"; + return { src, mime, extension, fileName: buildFileName(alt, extension), alt }; +} + +function findClassifiedCandidate( + payload: unknown, +): { value: string; classification: Classification } | null { + if (!payload || typeof payload !== "object") return null; + const record = payload as Record; + // `payload.images` is the explicit, provider-agnostic channel the agent + // mappers populate with renderable `data:` URLs (ACP/Claude image content + // blocks). Prefer it over sniffing `result`. + if (Array.isArray(record.images)) { + for (const value of record.images) { + if (typeof value !== "string" || value.length === 0) continue; + const classification = classifyCandidate(value); + if (classification) return { value, classification }; + } + } + for (const value of collectResultCandidates(record.result)) { + const classification = classifyCandidate(value); + if (classification) return { value, classification }; + } + return null; +} + +/** + * Build an {@link ImageViewSource} from a canonical assistant-message image + * content block (`{ kind: "image", dataUrl, mimeType?, name? }`). Returns null + * when the data URL isn't a recognizable inline image. + */ +export function imageViewSourceFromImageBlock(block: { + dataUrl?: unknown; + mimeType?: unknown; + name?: unknown; +}): ImageViewSource | null { + if (typeof block.dataUrl !== "string" || block.dataUrl.length === 0) return null; + const classification = classifyCandidate(block.dataUrl); + if (!classification) return null; + const src = buildSrc(block.dataUrl, classification); + if (!src) return null; + const mime = + typeof block.mimeType === "string" && block.mimeType.startsWith("image/") + ? block.mimeType + : classification.mime; + const extension = EXTENSION_BY_MIME[mime] ?? "png"; + const alt = + typeof block.name === "string" && block.name.trim().length > 0 ? block.name.trim() : "Image"; + return { src, mime, extension, fileName: buildFileName(alt, extension), alt }; +} + +function collectResultCandidates(result: unknown): string[] { + if (typeof result === "string") return result.length > 0 ? [result] : []; + if (!result || typeof result !== "object") return []; + const record = result as Record; + const out: string[] = []; + for (const key of RESULT_STRING_KEYS) { + const value = record[key]; + if (typeof value === "string" && value.length > 0) out.push(value); + } + for (const key of RESULT_ARRAY_KEYS) { + const value = record[key]; + if (!Array.isArray(value)) continue; + for (const entry of value) { + if (typeof entry === "string" && entry.length > 0) out.push(entry); + else if (entry && typeof entry === "object") { + const inner = entry as Record; + for (const innerKey of RESULT_STRING_KEYS) { + const candidate = inner[innerKey]; + if (typeof candidate === "string" && candidate.length > 0) out.push(candidate); + } + } + } + } + return out; +} + +function classifyCandidate(value: string): Classification | null { + const trimmedHead = value.slice(0, 16).trimStart(); + if (/^data:image\//i.test(trimmedHead)) { + return { kind: "dataUrl", mime: parseDataUrlMime(value) }; + } + if (/^]/i.test(trimmedHead) || /^<\?xml/i.test(trimmedHead)) { + return { kind: "rawSvg", mime: "image/svg+xml" }; + } + for (const [prefix, mime] of BASE64_IMAGE_SIGNATURES) { + if (value.startsWith(prefix)) return { kind: "base64", mime }; + } + return null; +} + +function buildSrc(value: string, classification: Classification): string | null { + switch (classification.kind) { + case "dataUrl": + return value; + case "rawSvg": + return `data:image/svg+xml;utf8,${encodeURIComponent(value.trim())}`; + case "base64": { + const clean = value.replace(/\s+/g, ""); + return clean.length > 0 ? `data:${classification.mime};base64,${clean}` : null; + } + } +} + +function parseDataUrlMime(value: string): string { + const match = /^\s*data:([^;,]+)[;,]/i.exec(value); + const mime = match?.[1]?.toLowerCase(); + return mime && mime.startsWith("image/") ? mime : "image/png"; +} + +function readStatus(payload: unknown): string | undefined { + if (!payload || typeof payload !== "object") return undefined; + const status = (payload as Record).status; + return typeof status === "string" ? status : undefined; +} + +function readPromptText(payload: unknown): string | undefined { + if (!payload || typeof payload !== "object") return undefined; + const record = payload as Record; + const args = record.args; + if (args && typeof args === "object" && !Array.isArray(args)) { + const argsRecord = args as Record; + for (const key of ["prompt", "text", "description", "caption", "query"]) { + const value = argsRecord[key]; + if (typeof value === "string" && value.trim().length > 0) return value.trim(); + } + } + const title = record.title; + if (typeof title === "string" && title.trim().length > 0) return title.trim(); + return undefined; +} + +function buildFileName(alt: string, extension: string): string { + const slug = alt + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, "") + .slice(0, 48); + const base = slug.length > 0 ? slug : "generated-image"; + return `${base}.${extension}`; +} diff --git a/src/renderer/state/chatRuntimePersister.test.ts b/src/renderer/state/chatRuntimePersister.test.ts index 9dafb292..1273e3b2 100644 --- a/src/renderer/state/chatRuntimePersister.test.ts +++ b/src/renderer/state/chatRuntimePersister.test.ts @@ -146,6 +146,33 @@ describe("prepareRuntimeSnapshotForPersistence", () => { ]); }); + it("keeps an image-bearing tool call discrete so the image survives reload", () => { + // A PNG base64 prefix — recognized as a renderable inline image. + const imagePayload = { + name: "imageGeneration", + status: "success", + result: "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwAD", + }; + const snapshot = prepareRuntimeSnapshotForPersistence( + [ + makeItem({ + id: "command-1", + type: "command_execution", + payload: { command: "ls", exitCode: 0 }, + }), + makeItem({ id: "image-1", type: "image_view", payload: imagePayload }), + ], + [], + ); + + const ids = snapshot.items.map((item) => item.id); + // The image is NOT folded into a "1 command, 1 tool" summary (which would + // strip the payload); it stays a discrete row with its image intact. + expect(ids).toEqual(["command-1", "image-1"]); + expect(snapshot.items.find((item) => item.id === "image-1")?.payload).toEqual(imagePayload); + expect(ids.some((id) => id.startsWith("tool-call-summary:"))).toBe(false); + }); + it("keeps dropped-anchor markers attached to the previous surviving row", () => { const snapshot = prepareRuntimeSnapshotForPersistence( [ diff --git a/src/renderer/state/chatRuntimePersister.ts b/src/renderer/state/chatRuntimePersister.ts index 9408c2dc..b15d133a 100644 --- a/src/renderer/state/chatRuntimePersister.ts +++ b/src/renderer/state/chatRuntimePersister.ts @@ -1,5 +1,6 @@ import type { ThreadContextUsage, ToolCallPayload } from "@/shared/contracts"; import { captureRendererException } from "../diagnostics/sentry"; +import { imageViewRendersInline } from "../components/thread/ChatPane/parts/items/imageViewSource"; import { isSubAgentTool } from "../components/thread/ChatPane/parts/items/toolDisplay"; import { readBridge } from "../bridge"; import { useAppStore } from "./appStore"; @@ -352,6 +353,19 @@ function isToolGroupItem(item: RuntimeChatItem): boolean { if (item.type === "tool_call" && isSubAgentTool(item.payload as ToolCallPayload | undefined)) { return false; } + // Tool rows that render as a standalone inline image (ImageView) must NOT be + // folded into a "N tools" summary: `summarizeToolCallRun` keeps only a name + + // status, which would strip the image off the payload and lose it on reload. + // Keep them as discrete rows so the picture survives hydration. + if ( + (item.type === "tool_call" || + item.type === "mcp_tool_call" || + item.type === "image_view" || + item.type === "dynamic_tool_call") && + imageViewRendersInline(item.payload) + ) { + return false; + } return ( item.type === "tool_call" || item.type === "mcp_tool_call" || diff --git a/src/shared/contracts/runtimeEvent.ts b/src/shared/contracts/runtimeEvent.ts index a144b380..d3849638 100644 --- a/src/shared/contracts/runtimeEvent.ts +++ b/src/shared/contracts/runtimeEvent.ts @@ -185,6 +185,14 @@ export const toolCallPayloadSchema = z.object({ serverId: z.string().optional(), args: z.unknown().optional(), result: z.unknown().optional(), + /** + * Inline images produced by the tool, as renderable `data:` URLs. Populated by + * the agent mappers when a tool result carries image content blocks (ACP + * `{ type: "image", data, mimeType }`, Claude `{ type: "image", source: { … } }`) + * so the renderer can show them inline. Codex's `imageGeneration` instead + * carries its base64 on `result`; both are handled by the renderer. + */ + images: z.array(z.string()).optional(), status: toolCallStatusSchema, progress: toolCallProgressSchema.optional(), isSubAgent: z.boolean().optional(), diff --git a/src/shared/ipc/procedureMap.ts b/src/shared/ipc/procedureMap.ts index 0bf9bd0c..26ac0e24 100644 --- a/src/shared/ipc/procedureMap.ts +++ b/src/shared/ipc/procedureMap.ts @@ -57,6 +57,8 @@ export const MAIN_LOCAL_PROCEDURE_NAMES = [ "pickFiles", "saveClipboardImage", "saveHandoffContext", + "saveImageFile", + "copyImageToClipboard", "createProjectDirectory", "openExternal", "openExternalNative", diff --git a/src/shared/ipc/procedures/app.ts b/src/shared/ipc/procedures/app.ts index 54990c32..7762d43c 100644 --- a/src/shared/ipc/procedures/app.ts +++ b/src/shared/ipc/procedures/app.ts @@ -3,11 +3,13 @@ import type { ProjectLocation } from "../../contracts"; import type { KeybindingsConfig } from "../../keybindings"; import { defineIpcProcedure, defineNoArgProcedure, definePayloadProcedure } from "../core"; import { + copyImageToClipboardPayloadSchema, createProjectDirectoryPayloadSchema, openExternalPayloadSchema, pickFilesOptionsSchema, saveClipboardImagePayloadSchema, saveHandoffContextPayloadSchema, + saveImageFilePayloadSchema, type CreateProjectDirectoryResult, } from "../schemas"; @@ -36,6 +38,16 @@ export const appProcedures = { string, "main-local" >("saveHandoffContext", "main-local", saveHandoffContextPayloadSchema), + saveImageFile: definePayloadProcedure< + z.infer, + string | null, + "main-local" + >("saveImageFile", "main-local", saveImageFilePayloadSchema), + copyImageToClipboard: definePayloadProcedure< + z.infer, + boolean, + "main-local" + >("copyImageToClipboard", "main-local", copyImageToClipboardPayloadSchema), createProjectDirectory: definePayloadProcedure< z.infer, CreateProjectDirectoryResult, diff --git a/src/shared/ipc/schemas.ts b/src/shared/ipc/schemas.ts index 2bcafd3c..72f4a0f2 100644 --- a/src/shared/ipc/schemas.ts +++ b/src/shared/ipc/schemas.ts @@ -33,6 +33,18 @@ export const saveHandoffContextPayloadSchema = z.object({ content: z.string(), }); +export const saveImageFilePayloadSchema = z.object({ + /** Raw image bytes to write to the user-chosen path. */ + data: z.instanceof(Uint8Array), + /** Default file name shown in the Save dialog (e.g. `"generated-image.png"`). */ + suggestedName: z.string().min(1), +}); + +export const copyImageToClipboardPayloadSchema = z.object({ + /** Raw image bytes to place on the OS clipboard as an image. */ + data: z.instanceof(Uint8Array), +}); + export const createProjectDirectoryPayloadSchema = z.object({ /** Absolute parent directory (native path, or a `\\wsl...` UNC path). */ parent: z.string().min(1), diff --git a/src/supervisor/agents/acp/canonicalMapping.test.ts b/src/supervisor/agents/acp/canonicalMapping.test.ts index 1e6af64e..69c3759c 100644 --- a/src/supervisor/agents/acp/canonicalMapping.test.ts +++ b/src/supervisor/agents/acp/canonicalMapping.test.ts @@ -191,6 +191,45 @@ describe("mapAcpSessionUpdate", () => { expect(state.toolCallItems.has("tc-1")).toBe(false); }); + it("preserves inline image content from a tool result onto payload.images", () => { + const state = createAcpMapperState("t-image"); + mapAcpSessionUpdate( + note({ + sessionUpdate: "tool_call", + toolCallId: "tc-img", + title: "generate_image", + kind: "other", + status: "in_progress", + rawInput: { prompt: "a red square" }, + } as Parameters[0]["update"]), + state, + ); + + const completed = mapAcpSessionUpdate( + note({ + sessionUpdate: "tool_call_update", + toolCallId: "tc-img", + status: "completed", + content: [ + { + type: "content", + content: { type: "image", data: "iVBORw0KGgo=", mimeType: "image/png" }, + }, + ], + } as Parameters[0]["update"]), + state, + ); + + // A terminal tool_call_update seals the row on `item.completed`, carrying + // the final payload. The base64 image survives onto payload.images as a + // renderable data URL so the chat row can show it inline (the text-only + // extractor would drop it). + const sealed = completed.find((e) => e.type === "item.completed") as + | { payload?: Record } + | undefined; + expect(sealed?.payload?.images).toEqual(["data:image/png;base64,iVBORw0KGgo="]); + }); + it("falls back to the tool title for command_execution when rawInput.command is missing", () => { // Gemini's ACP run_shell_command tool emits `kind: "execute"` with the // command in `title` instead of `rawInput.command`. Without the fallback diff --git a/src/supervisor/agents/acp/canonicalMapping.ts b/src/supervisor/agents/acp/canonicalMapping.ts index 46249aa2..c0ca8ffd 100644 --- a/src/supervisor/agents/acp/canonicalMapping.ts +++ b/src/supervisor/agents/acp/canonicalMapping.ts @@ -592,12 +592,14 @@ function buildAcpToolCallPayload( const locations = extractToolLocations(toolCall.locations); const name = title ?? kind ?? "tool"; const contentResult = extractToolCallContentText(toolCall.content, resolveTerminalOutput); + const images = extractToolCallContentImages(toolCall.content); const subAgentModel = isSubAgent ? readStringField(toolCall.rawInput, "model") : undefined; const base: Record = { name, args: toolCall.rawInput, status, ...(contentResult !== undefined ? { result: contentResult } : {}), + ...(images.length > 0 ? { images } : {}), ...(title ? { title } : {}), ...(kind ? { kind } : {}), ...(locations.length > 0 ? { locations } : {}), @@ -681,6 +683,7 @@ function buildAcpToolCallUpdatePayload( resolveTerminalOutput, item.terminalId, ); + const images = extractToolCallContentImages(toolCall.content); const isFileChange = item.itemType === "file_change"; const contentDiffs = isFileChange ? extractAcpFileChangesFromContent(toolCall.content) : []; const contentDiffText = isFileChange ? joinAcpContentFileChangeDiffs(contentDiffs) : undefined; @@ -695,6 +698,7 @@ function buildAcpToolCallUpdatePayload( const payload: Record = { status, ...(result !== undefined ? { result } : {}), + ...(images.length > 0 ? { images } : {}), ...(title || kind ? { name: title ?? kind } : {}), ...(title ? { title } : {}), ...(kind ? { kind } : {}), @@ -1081,6 +1085,32 @@ function extractToolCallContentText( return parts.length > 0 ? parts.join("\n") : undefined; } +/** + * Collect inline images from an ACP tool result's `ToolCallContent[]` as + * renderable `data:` URLs. ACP carries images as + * `{ type: "content", content: { type: "image", data: "", mimeType } }` + * — `extractToolCallContentText` keeps only text, so this preserves the picture + * for the renderer's inline image card. Only inline base64 `data` is honored; + * `uri`-only references are left to fall through to the accordion. + */ +function extractToolCallContentImages(content: unknown): string[] { + if (!Array.isArray(content)) return []; + const images: string[] = []; + for (const entry of content) { + if (!entry || typeof entry !== "object") continue; + const e = entry as Record; + if (e.type !== "content") continue; + const inner = e.content; + if (!inner || typeof inner !== "object") continue; + const block = inner as Record; + if (block.type !== "image") continue; + if (typeof block.data !== "string" || block.data.length === 0) continue; + const mime = typeof block.mimeType === "string" ? block.mimeType : "image/png"; + images.push(`data:${mime};base64,${block.data}`); + } + return images; +} + /** * Try to recover the shell command from an ACP `tool_call` title when the * agent didn't put it under `rawInput.command`. Gemini's ACP server passes the diff --git a/src/supervisor/agents/claude/sdkCanonicalMapping.test.ts b/src/supervisor/agents/claude/sdkCanonicalMapping.test.ts index 9c1c8e8d..74891058 100644 --- a/src/supervisor/agents/claude/sdkCanonicalMapping.test.ts +++ b/src/supervisor/agents/claude/sdkCanonicalMapping.test.ts @@ -1076,6 +1076,54 @@ describe("sdkCanonicalMapping — tool use", () => { ]); }); + it("preserves image content blocks from a tool result onto payload.images", () => { + const state = createClaudeMapperState("thread-1"); + mapClaudeSdkMessage( + streamEvent({ + type: "content_block_start", + index: 0, + content_block: { + type: "tool_use", + id: "toolu_img", + name: "generate_picture", + input: { prompt: "a cat" }, + }, + }), + state, + ); + + const events = mapClaudeSdkMessage( + { + type: "user", + session_id: "claude-session", + parent_tool_use_id: null, + message: { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_img", + content: [ + { + type: "image", + source: { type: "base64", media_type: "image/png", data: "iVBORw0KGgo=" }, + }, + ], + }, + ], + }, + } as unknown as SDKMessage, + state, + ); + + const updated = events.find((e) => e.type === "item.updated") as + | { payload: Record } + | undefined; + // The text-only extractor would drop the image; it must survive onto + // payload.images as a renderable data URL so the chat row shows it inline. + expect(updated?.payload.images).toEqual(["data:image/png;base64,iVBORw0KGgo="]); + }); + it("maps Edit tool results as ACP-shaped file changes", () => { const state = createClaudeMapperState("thread-1"); const args = { diff --git a/src/supervisor/agents/claude/sdkCanonicalMapping.ts b/src/supervisor/agents/claude/sdkCanonicalMapping.ts index b54643a1..55c6178d 100644 --- a/src/supervisor/agents/claude/sdkCanonicalMapping.ts +++ b/src/supervisor/agents/claude/sdkCanonicalMapping.ts @@ -728,10 +728,48 @@ export function parseClaudeQuestions(input: Record): ClaudeQues }); } +/** + * Collect inline images out of a Claude `tool_result` content (Anthropic image + * blocks: `{ type: "image", source: { type: "base64", media_type, data } }`) as + * renderable `data:` URLs, so MCP/tool-generated images survive onto the + * payload instead of being dropped by the text-only `extractText`. Only inline + * base64 sources are honored; remote `url` sources are intentionally skipped. + */ +function extractToolResultImages(value: unknown): string[] { + const images: string[] = []; + const walk = (node: unknown): void => { + if (Array.isArray(node)) { + for (const entry of node) walk(entry); + return; + } + if (!node || typeof node !== "object") return; + const obj = node as Record; + if (obj.type === "image") { + const source = obj.source; + if (source && typeof source === "object") { + const s = source as Record; + if ( + s.type === "base64" && + typeof s.data === "string" && + s.data.length > 0 && + typeof s.media_type === "string" + ) { + images.push(`data:${s.media_type};base64,${s.data}`); + } + } + return; + } + if (obj.content !== undefined) walk(obj.content); + }; + walk(value); + return images; +} + function toolPayload( tool: ToolItemState, status: "running" | "success" | "error", result?: unknown, + images?: string[], ): unknown { const errorMessage = status === "error" && result && typeof result === "object" @@ -784,6 +822,7 @@ function toolPayload( ...(kind ? { kind } : {}), args: tool.input, result, + ...(images && images.length > 0 ? { images } : {}), status, ...(tool.progress ? { progress: tool.progress } : {}), ...(tool.toolName === "Workflow" ? { isSubAgent: true } : {}), @@ -1689,6 +1728,7 @@ function mapClaudeSdkMessageInner( const tool = state.toolItemsById.get(toolUseId); if (!tool) continue; const text = extractText(obj.content); + const images = extractToolResultImages(obj.content); if (tool.planAggregatorRole) { if (tool.planAggregatorRole === "TaskCreate" && text.length > 0) { bindTaskCreateResult(state, tool, text); @@ -1730,7 +1770,7 @@ function mapClaudeSdkMessageInner( itemId: tool.itemId, payload: hasToolCallPayload(tool.itemType) || tool.itemType === "file_change" - ? toolPayload(tool, isError ? "error" : "success", text) + ? toolPayload(tool, isError ? "error" : "success", text, images) : toolPayload(tool, isError ? "error" : "success"), }); events.push({ type: "item.completed", threadId: state.threadId, itemId: tool.itemId });