Skip to content

Commit b6cc513

Browse files
committed
fireworks: fallback to standard api on any 500 errro
1 parent aa2b977 commit b6cc513

File tree

2 files changed

+86
-34
lines changed

2 files changed

+86
-34
lines changed

web/src/llm-api/__tests__/fireworks-deployment.test.ts

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -202,33 +202,88 @@ describe('Fireworks deployment routing', () => {
202202
}
203203
})
204204

205-
it('throws FireworksError on non-scaling 503 from deployment', async () => {
205+
it('falls back to standard API on non-scaling 503 from deployment', async () => {
206206
const spy = spyDeploymentHours(true)
207+
const fetchCalls: string[] = []
208+
let callCount = 0
207209

208-
const mockFetch = mock(async () => {
209-
return new Response(
210-
JSON.stringify({
211-
error: {
212-
message: 'Service temporarily unavailable',
213-
code: 'SERVICE_UNAVAILABLE',
214-
type: 'error',
215-
},
216-
}),
217-
{ status: 503, statusText: 'Service Unavailable' },
218-
)
210+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
211+
const body = JSON.parse(init?.body as string)
212+
fetchCalls.push(body.model)
213+
callCount++
214+
215+
if (callCount === 1) {
216+
return new Response(
217+
JSON.stringify({
218+
error: {
219+
message: 'Service temporarily unavailable',
220+
code: 'SERVICE_UNAVAILABLE',
221+
type: 'error',
222+
},
223+
}),
224+
{ status: 503, statusText: 'Service Unavailable' },
225+
)
226+
}
227+
228+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
219229
}) as unknown as typeof globalThis.fetch
220230

221231
try {
222-
await expect(
223-
createFireworksRequestWithFallback({
224-
body: minimalBody as never,
225-
originalModel: 'minimax/minimax-m2.5',
226-
fetch: mockFetch,
227-
logger,
228-
useCustomDeployment: true,
229-
sessionId: 'test-user-id',
230-
}),
231-
).rejects.toBeInstanceOf(FireworksError)
232+
const response = await createFireworksRequestWithFallback({
233+
body: minimalBody as never,
234+
originalModel: 'minimax/minimax-m2.5',
235+
fetch: mockFetch,
236+
logger,
237+
useCustomDeployment: true,
238+
sessionId: 'test-user-id',
239+
})
240+
241+
expect(response.status).toBe(200)
242+
expect(fetchCalls).toHaveLength(2)
243+
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
244+
expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
245+
// Non-scaling 503 should NOT activate the cooldown
246+
expect(isDeploymentCoolingDown()).toBe(false)
247+
} finally {
248+
spy.restore()
249+
}
250+
})
251+
252+
it('falls back to standard API on 500 Internal Error from deployment', async () => {
253+
const spy = spyDeploymentHours(true)
254+
const fetchCalls: string[] = []
255+
let callCount = 0
256+
257+
const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
258+
const body = JSON.parse(init?.body as string)
259+
fetchCalls.push(body.model)
260+
callCount++
261+
262+
if (callCount === 1) {
263+
return new Response(
264+
JSON.stringify({ error: 'Internal error' }),
265+
{ status: 500, statusText: 'Internal Server Error' },
266+
)
267+
}
268+
269+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
270+
}) as unknown as typeof globalThis.fetch
271+
272+
try {
273+
const response = await createFireworksRequestWithFallback({
274+
body: minimalBody as never,
275+
originalModel: 'minimax/minimax-m2.5',
276+
fetch: mockFetch,
277+
logger,
278+
useCustomDeployment: true,
279+
sessionId: 'test-user-id',
280+
})
281+
282+
expect(response.status).toBe(200)
283+
expect(fetchCalls).toHaveLength(2)
284+
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
285+
expect(fetchCalls[1]).toBe(STANDARD_MODEL_ID)
286+
expect(isDeploymentCoolingDown()).toBe(false)
232287
} finally {
233288
spy.restore()
234289
}
@@ -292,7 +347,7 @@ describe('Fireworks deployment routing', () => {
292347
}
293348
})
294349

295-
it('returns non-200 responses from deployment without fallback (non-503)', async () => {
350+
it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
296351
const spy = spyDeploymentHours(true)
297352
const fetchCalls: string[] = []
298353

@@ -315,7 +370,7 @@ describe('Fireworks deployment routing', () => {
315370
sessionId: 'test-user-id',
316371
})
317372

318-
// Non-503 errors from deployment are returned as-is (caller handles them)
373+
// Non-5xx errors from deployment are returned as-is (caller handles them)
319374
expect(response.status).toBe(429)
320375
expect(fetchCalls).toHaveLength(1)
321376
expect(fetchCalls[0]).toBe(DEPLOYMENT_MODEL_ID)
@@ -324,7 +379,7 @@ describe('Fireworks deployment routing', () => {
324379
}
325380
})
326381

327-
it('logs when trying deployment and when falling back', async () => {
382+
it('logs when trying deployment and when falling back on 5xx', async () => {
328383
const spy = spyDeploymentHours(true)
329384
let callCount = 0
330385

web/src/llm-api/fireworks.ts

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -707,19 +707,16 @@ export async function createFireworksRequestWithFallback(params: {
707707
sessionId,
708708
})
709709

710-
if (response.status === 503) {
710+
if (response.status >= 500) {
711711
const errorText = await response.text()
712+
logger.info(
713+
{ model: originalModel, status: response.status, errorText: errorText.slice(0, 200) },
714+
'Fireworks custom deployment returned 5xx, falling back to standard API',
715+
)
712716
if (errorText.includes('DEPLOYMENT_SCALING_UP')) {
713-
logger.info(
714-
{ model: originalModel },
715-
'Fireworks deployment scaling up, falling back to standard API',
716-
)
717717
markDeploymentScalingUp()
718-
// Fall through to standard API request below
719-
} else {
720-
// Non-scaling 503 — treat as a real error
721-
throw parseFireworksErrorFromText(response.status, response.statusText, errorText)
722718
}
719+
// Fall through to standard API request below
723720
} else {
724721
return response
725722
}

0 commit comments

Comments
 (0)