diff --git a/.changeset/openai-chat-image-attachments.md b/.changeset/openai-chat-image-attachments.md new file mode 100644 index 00000000..c444b021 --- /dev/null +++ b/.changeset/openai-chat-image-attachments.md @@ -0,0 +1,6 @@ +--- +"@open-codesign/core": patch +"@open-codesign/providers": patch +--- + +Forward image attachments through OpenAI-compatible and Anthropic-style provider paths instead of only marking Codex synthesized models as image-capable. diff --git a/apps/desktop/src/renderer/src/components/AddCustomProviderModal.tsx b/apps/desktop/src/renderer/src/components/AddCustomProviderModal.tsx index 5fa3e36d..c0b1f659 100644 --- a/apps/desktop/src/renderer/src/components/AddCustomProviderModal.tsx +++ b/apps/desktop/src/renderer/src/components/AddCustomProviderModal.tsx @@ -298,7 +298,7 @@ export function AddCustomProviderModal({ if (!editTarget.builtin) { const previous = editTarget.tlsRejectUnauthorized === true; if (previous !== tlsRejectUnauthorized) { - update.tlsRejectUnauthorized = tlsRejectUnauthorized ? true : false; + update.tlsRejectUnauthorized = !!tlsRejectUnauthorized; } } await window.codesign.config.updateProvider(update); diff --git a/biome.json b/biome.json index 7134594f..83d65646 100644 --- a/biome.json +++ b/biome.json @@ -1,5 +1,5 @@ { - "$schema": "https://biomejs.dev/schemas/2.4.14/schema.json", + "$schema": "https://biomejs.dev/schemas/2.4.15/schema.json", "vcs": { "enabled": true, "clientKind": "git", diff --git a/packages/core/src/agent.test.ts b/packages/core/src/agent.test.ts index ee438e1a..2a2a306e 100644 --- a/packages/core/src/agent.test.ts +++ b/packages/core/src/agent.test.ts @@ -1156,6 +1156,33 @@ describe('generateViaAgent()', () => { ); }); + it('passes image attachments through openai-compatible agent models', async () => { + scriptedAgent = { assistantText: RESPONSE_WITH_ARTIFACT }; + await generateViaAgent( + { + prompt: 'replicate this screenshot', + history: [], + model: { provider: 'custom-openai', modelId: 'local-text-or-vision-model' }, + apiKey: 'sk-test', + wire: 'openai-chat', + baseUrl: 'https://gateway.example.test/v1', + attachments: [ + { + name: 'shot.png', + path: 'references/shot.png', + mediaType: 'image/png', + imageDataUrl: 'data:image/png;base64,aW1n', + }, + ], + }, + { fs: makeStubFs({}) }, + ); + + expect(agentCalls[0]?.prompts[0]?.images).toEqual([ + { type: 'image', data: 'aW1n', mimeType: 'image/png' }, + ]); + }); + it('blocks preview and done until set_todos has run for fresh multi-step work', async () => { scriptedAgent = { assistantText: RESPONSE_WITH_ARTIFACT }; await generateViaAgent( diff --git a/packages/core/src/agent.ts b/packages/core/src/agent.ts index 7e307a59..e0e375b5 100644 --- a/packages/core/src/agent.ts +++ b/packages/core/src/agent.ts @@ -242,6 +242,9 @@ function supportsImageInput(wire: WireApi | undefined, modelId: string): boolean if (wire === 'anthropic' || wire === 'openai-responses' || wire === 'openai-codex-responses') { return true; } + if (wire === 'openai-chat') { + return true; + } const lower = modelId.toLowerCase(); return ( lower.includes('vision') || diff --git a/packages/providers/src/index.test.ts b/packages/providers/src/index.test.ts index d2711a49..c2ec14ec 100644 --- a/packages/providers/src/index.test.ts +++ b/packages/providers/src/index.test.ts @@ -361,6 +361,57 @@ describe('complete', () => { expect(result.content).toBe('ok'); }); + it('keeps image inputs for synthesized openai-chat models', async () => { + getModelMock.mockReturnValue(undefined); + completeSimpleMock.mockImplementationOnce(async (model, context) => { + expect(model).toMatchObject({ + api: 'openai-completions', + input: ['text', 'image'], + baseUrl: 'https://gateway.example.test/v1', + }); + expect(context.messages).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'use this screenshot' }, + { type: 'image', data: 'AAAA', mimeType: 'image/png' }, + ], + timestamp: 1, + }, + ]); + return { + role: 'assistant', + content: [{ type: 'text', text: 'ok' }], + api: 'openai-completions', + provider: 'custom-openai', + model: 'local-text-or-vision-model', + usage: { + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: 'stop', + timestamp: Date.now(), + }; + }); + + const result = await complete( + { provider: 'custom-openai', modelId: 'local-text-or-vision-model' }, + [{ role: 'user', content: 'use this screenshot' }], + { + apiKey: 'sk-test', + wire: 'openai-chat', + baseUrl: 'https://gateway.example.test/v1', + userImages: [{ data: 'AAAA', mimeType: 'image/png' }], + }, + ); + + expect(result.content).toBe('ok'); + }); + it('synthesizes openai-chat PiModel with reasoning=false for Qwen DashScope (#183)', async () => { getModelMock.mockReturnValue(undefined); completeSimpleMock.mockImplementationOnce(async (model) => { diff --git a/packages/providers/src/index.ts b/packages/providers/src/index.ts index f2e18927..34cfdf70 100644 --- a/packages/providers/src/index.ts +++ b/packages/providers/src/index.ts @@ -308,7 +308,11 @@ function synthesizeWireModel( wire: GenerateOptions['wire'], baseUrl: string | undefined, ): PiModel { - const supportsImageInput = wire === 'openai-codex-responses'; + const supportsImageInput = + wire === 'anthropic' || + wire === 'openai-chat' || + wire === 'openai-responses' || + wire === 'openai-codex-responses'; const api = wire === 'anthropic' ? 'anthropic-messages'