From ff5dde422ebfe08a4a36fef250553dccda41fdfb Mon Sep 17 00:00:00 2001 From: Marius Wichtner Date: Tue, 12 May 2026 12:00:07 +0200 Subject: [PATCH 1/5] fix(gateway): strip embedding encoding format upstream --- .../lib/ai-gateway/embeddings/embedding-request.test.ts | 4 ++-- .../src/lib/ai-gateway/embeddings/embedding-request.ts | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts index 08d3891a8..e8ec4561c 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from '@jest/globals'; import { buildUpstreamBody } from './embedding-request'; describe('buildUpstreamBody', () => { - it('should forward supported fields and strip client-only, Mistral-specific, and deprecated fields', () => { + it('should forward supported fields and strip client-only, SDK-only, Mistral-specific, and deprecated fields', () => { const result = buildUpstreamBody({ model: 'google/text-embedding-004', input: ['text1', 'text2'], @@ -18,12 +18,12 @@ describe('buildUpstreamBody', () => { expect(result).toEqual({ model: 'google/text-embedding-004', input: ['text1', 'text2'], - encoding_format: 'float', safety_identifier: 'hash-abc', provider: { order: ['Google'] }, input_type: 'search_document', }); expect(result).not.toHaveProperty('dimensions'); + expect(result).not.toHaveProperty('encoding_format'); expect(result).not.toHaveProperty('output_dtype'); expect(result).not.toHaveProperty('output_dimension'); }); diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts index 770b6bb7a..5f73a3f8a 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts @@ -5,6 +5,7 @@ export type EmbeddingProxyRequest = { dimensions?: number; safety_identifier?: string; provider?: Record; + providerOptions?: Record; input_type?: string; // Mistral-specific output_dtype?: string; @@ -21,9 +22,10 @@ export function buildUpstreamBody( ): Record { const { dimensions: _, - output_dtype: __, - output_dimension: ___, - user: ____, + encoding_format: __, + output_dtype: ___, + output_dimension: ____, + user: _____, ...upstreamBody } = body; return upstreamBody; From 1459fbfd8faa0e060a7f3791321bfbed9b644318 Mon Sep 17 00:00:00 2001 From: Marius Wichtner Date: Tue, 12 May 2026 12:02:16 +0200 Subject: [PATCH 2/5] fix(gateway): adapt embedding base64 responses --- .../app/api/openrouter/embeddings/route.ts | 7 ++- .../embeddings/embedding-request.test.ts | 47 ++++++++++++++++++- .../embeddings/embedding-request.ts | 43 +++++++++++++++++ 3 files changed, 95 insertions(+), 2 deletions(-) diff --git a/apps/web/src/app/api/openrouter/embeddings/route.ts b/apps/web/src/app/api/openrouter/embeddings/route.ts index 5b82007bd..e1b52d5d5 100644 --- a/apps/web/src/app/api/openrouter/embeddings/route.ts +++ b/apps/web/src/app/api/openrouter/embeddings/route.ts @@ -33,6 +33,7 @@ import { import { emitApiMetricsForResponse } from '@/lib/ai-gateway/o11y/api-metrics.server'; import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { + buildDownstreamResponse, buildUpstreamBody, type EmbeddingProxyRequest, } from '@/lib/ai-gateway/embeddings/embedding-request'; @@ -251,11 +252,15 @@ export async function POST(request: NextRequest): Promise { it('should forward supported fields and strip client-only, SDK-only, Mistral-specific, and deprecated fields', () => { @@ -61,3 +61,48 @@ describe('buildUpstreamBody', () => { expect(result).not.toHaveProperty('output_dimension'); }); }); + +describe('buildDownstreamResponse', () => { + it('converts numeric embeddings to base64 when the client requested base64', async () => { + const response = new Response( + JSON.stringify({ + data: [ + { object: 'embedding', embedding: [1, 2, 3], index: 0 }, + { object: 'embedding', embedding: [4, 5, 6], index: 1 }, + ], + usage: { prompt_tokens: 1, total_tokens: 1 }, + }), + { status: 200, headers: { 'content-type': 'application/json' } } + ); + + const result = await buildDownstreamResponse(response, 'base64'); + const body = await result.json(); + + expect(body.data[0].embedding).toBe( + Buffer.from(new Float32Array([1, 2, 3]).buffer).toString('base64') + ); + expect(body.data[1].embedding).toBe( + Buffer.from(new Float32Array([4, 5, 6]).buffer).toString('base64') + ); + expect(body.usage).toEqual({ prompt_tokens: 1, total_tokens: 1 }); + }); + + it('leaves responses unchanged when base64 was not requested', async () => { + const response = new Response(JSON.stringify({ data: [{ embedding: [1, 2, 3] }] }), { + status: 200, + headers: { 'content-type': 'application/json' }, + }); + + const result = await buildDownstreamResponse(response, undefined); + + expect(result).toBe(response); + }); + + it('leaves error responses unchanged', async () => { + const response = new Response('Bad Request', { status: 400 }); + + const result = await buildDownstreamResponse(response, 'base64'); + + expect(result).toBe(response); + }); +}); diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts index 5f73a3f8a..4c0d6993d 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts @@ -1,3 +1,5 @@ +import { Buffer } from 'node:buffer'; + export type EmbeddingProxyRequest = { model: string; input: unknown; @@ -30,3 +32,44 @@ export function buildUpstreamBody( } = body; return upstreamBody; } + +function floatEmbeddingToBase64(embedding: number[]): string { + return Buffer.from(new Float32Array(embedding).buffer).toString('base64'); +} + +export async function buildDownstreamResponse( + response: Response, + encodingFormat: EmbeddingProxyRequest['encoding_format'] +): Promise { + if (!response.ok || encodingFormat !== 'base64') return response; + + const contentType = response.headers.get('content-type') ?? ''; + if (!contentType.includes('application/json')) return response; + + const body = await response.clone().json(); + if (!body || typeof body !== 'object' || !Array.isArray(body.data)) return response; + + const data = body.data.map((item: unknown) => { + if ( + !item || + typeof item !== 'object' || + !Array.isArray((item as { embedding?: unknown }).embedding) + ) { + return item; + } + return { + ...item, + embedding: floatEmbeddingToBase64((item as { embedding: number[] }).embedding), + }; + }); + + const headers = new Headers(response.headers); + headers.set('content-type', 'application/json'); + headers.delete('content-length'); + + return new Response(JSON.stringify({ ...body, data }), { + status: response.status, + statusText: response.statusText, + headers, + }); +} From 5bd9c15e601d2e99d4801cf2f9bc1ba3ac17206d Mon Sep 17 00:00:00 2001 From: Marius Wichtner Date: Wed, 13 May 2026 10:03:43 +0200 Subject: [PATCH 3/5] Revert "fix(gateway): adapt embedding base64 responses" This reverts commit 1459fbfd8faa0e060a7f3791321bfbed9b644318. --- .../app/api/openrouter/embeddings/route.ts | 7 +-- .../embeddings/embedding-request.test.ts | 47 +------------------ .../embeddings/embedding-request.ts | 43 ----------------- 3 files changed, 2 insertions(+), 95 deletions(-) diff --git a/apps/web/src/app/api/openrouter/embeddings/route.ts b/apps/web/src/app/api/openrouter/embeddings/route.ts index e1b52d5d5..5b82007bd 100644 --- a/apps/web/src/app/api/openrouter/embeddings/route.ts +++ b/apps/web/src/app/api/openrouter/embeddings/route.ts @@ -33,7 +33,6 @@ import { import { emitApiMetricsForResponse } from '@/lib/ai-gateway/o11y/api-metrics.server'; import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { - buildDownstreamResponse, buildUpstreamBody, type EmbeddingProxyRequest, } from '@/lib/ai-gateway/embeddings/embedding-request'; @@ -252,15 +251,11 @@ export async function POST(request: NextRequest): Promise { it('should forward supported fields and strip client-only, SDK-only, Mistral-specific, and deprecated fields', () => { @@ -61,48 +61,3 @@ describe('buildUpstreamBody', () => { expect(result).not.toHaveProperty('output_dimension'); }); }); - -describe('buildDownstreamResponse', () => { - it('converts numeric embeddings to base64 when the client requested base64', async () => { - const response = new Response( - JSON.stringify({ - data: [ - { object: 'embedding', embedding: [1, 2, 3], index: 0 }, - { object: 'embedding', embedding: [4, 5, 6], index: 1 }, - ], - usage: { prompt_tokens: 1, total_tokens: 1 }, - }), - { status: 200, headers: { 'content-type': 'application/json' } } - ); - - const result = await buildDownstreamResponse(response, 'base64'); - const body = await result.json(); - - expect(body.data[0].embedding).toBe( - Buffer.from(new Float32Array([1, 2, 3]).buffer).toString('base64') - ); - expect(body.data[1].embedding).toBe( - Buffer.from(new Float32Array([4, 5, 6]).buffer).toString('base64') - ); - expect(body.usage).toEqual({ prompt_tokens: 1, total_tokens: 1 }); - }); - - it('leaves responses unchanged when base64 was not requested', async () => { - const response = new Response(JSON.stringify({ data: [{ embedding: [1, 2, 3] }] }), { - status: 200, - headers: { 'content-type': 'application/json' }, - }); - - const result = await buildDownstreamResponse(response, undefined); - - expect(result).toBe(response); - }); - - it('leaves error responses unchanged', async () => { - const response = new Response('Bad Request', { status: 400 }); - - const result = await buildDownstreamResponse(response, 'base64'); - - expect(result).toBe(response); - }); -}); diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts index 4c0d6993d..5f73a3f8a 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts @@ -1,5 +1,3 @@ -import { Buffer } from 'node:buffer'; - export type EmbeddingProxyRequest = { model: string; input: unknown; @@ -32,44 +30,3 @@ export function buildUpstreamBody( } = body; return upstreamBody; } - -function floatEmbeddingToBase64(embedding: number[]): string { - return Buffer.from(new Float32Array(embedding).buffer).toString('base64'); -} - -export async function buildDownstreamResponse( - response: Response, - encodingFormat: EmbeddingProxyRequest['encoding_format'] -): Promise { - if (!response.ok || encodingFormat !== 'base64') return response; - - const contentType = response.headers.get('content-type') ?? ''; - if (!contentType.includes('application/json')) return response; - - const body = await response.clone().json(); - if (!body || typeof body !== 'object' || !Array.isArray(body.data)) return response; - - const data = body.data.map((item: unknown) => { - if ( - !item || - typeof item !== 'object' || - !Array.isArray((item as { embedding?: unknown }).embedding) - ) { - return item; - } - return { - ...item, - embedding: floatEmbeddingToBase64((item as { embedding: number[] }).embedding), - }; - }); - - const headers = new Headers(response.headers); - headers.set('content-type', 'application/json'); - headers.delete('content-length'); - - return new Response(JSON.stringify({ ...body, data }), { - status: response.status, - statusText: response.statusText, - headers, - }); -} From 640bcaaafd6619ba061408caed390482a0770a94 Mon Sep 17 00:00:00 2001 From: Marius Wichtner Date: Wed, 13 May 2026 10:03:43 +0200 Subject: [PATCH 4/5] Revert "fix(gateway): strip embedding encoding format upstream" This reverts commit ff5dde422ebfe08a4a36fef250553dccda41fdfb. --- .../lib/ai-gateway/embeddings/embedding-request.test.ts | 4 ++-- .../src/lib/ai-gateway/embeddings/embedding-request.ts | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts index e8ec4561c..08d3891a8 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.test.ts @@ -2,7 +2,7 @@ import { describe, it, expect } from '@jest/globals'; import { buildUpstreamBody } from './embedding-request'; describe('buildUpstreamBody', () => { - it('should forward supported fields and strip client-only, SDK-only, Mistral-specific, and deprecated fields', () => { + it('should forward supported fields and strip client-only, Mistral-specific, and deprecated fields', () => { const result = buildUpstreamBody({ model: 'google/text-embedding-004', input: ['text1', 'text2'], @@ -18,12 +18,12 @@ describe('buildUpstreamBody', () => { expect(result).toEqual({ model: 'google/text-embedding-004', input: ['text1', 'text2'], + encoding_format: 'float', safety_identifier: 'hash-abc', provider: { order: ['Google'] }, input_type: 'search_document', }); expect(result).not.toHaveProperty('dimensions'); - expect(result).not.toHaveProperty('encoding_format'); expect(result).not.toHaveProperty('output_dtype'); expect(result).not.toHaveProperty('output_dimension'); }); diff --git a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts index 5f73a3f8a..770b6bb7a 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/embedding-request.ts @@ -5,7 +5,6 @@ export type EmbeddingProxyRequest = { dimensions?: number; safety_identifier?: string; provider?: Record; - providerOptions?: Record; input_type?: string; // Mistral-specific output_dtype?: string; @@ -22,10 +21,9 @@ export function buildUpstreamBody( ): Record { const { dimensions: _, - encoding_format: __, - output_dtype: ___, - output_dimension: ____, - user: _____, + output_dtype: __, + output_dimension: ___, + user: ____, ...upstreamBody } = body; return upstreamBody; From eee8dc3ed2f6682cfbd898c637f8d556fd729e0c Mon Sep 17 00:00:00 2001 From: Marius Wichtner Date: Wed, 13 May 2026 10:05:24 +0200 Subject: [PATCH 5/5] fix(gateway): correct Codestral embedding dimension --- apps/web/src/app/api/gateway/embedding-models/route.test.ts | 2 +- apps/web/src/lib/ai-gateway/embeddings/kilo-embedding-models.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/web/src/app/api/gateway/embedding-models/route.test.ts b/apps/web/src/app/api/gateway/embedding-models/route.test.ts index cf376d467..fb21cd81a 100644 --- a/apps/web/src/app/api/gateway/embedding-models/route.test.ts +++ b/apps/web/src/app/api/gateway/embedding-models/route.test.ts @@ -24,7 +24,7 @@ describe('GET /api/gateway/embedding-models', () => { }); expect(getKiloEmbeddingModel('codestral-embed-2505')).toMatchObject({ id: 'mistralai/codestral-embed-2505', - dimension: 256, + dimension: 1536, scoreThreshold: 0.35, }); expect(normalizeKiloEmbeddingModelId('text-embedding-3-small')).toBe( diff --git a/apps/web/src/lib/ai-gateway/embeddings/kilo-embedding-models.ts b/apps/web/src/lib/ai-gateway/embeddings/kilo-embedding-models.ts index e21e33e6e..71f7e1aef 100644 --- a/apps/web/src/lib/ai-gateway/embeddings/kilo-embedding-models.ts +++ b/apps/web/src/lib/ai-gateway/embeddings/kilo-embedding-models.ts @@ -18,7 +18,7 @@ export const KILO_EMBEDDING_MODELS = [ { id: 'mistralai/codestral-embed-2505', name: 'Codestral Embed 2505', - dimension: 256, + dimension: 1536, scoreThreshold: 0.35, note: 'code', },