@@ -202,33 +202,88 @@ describe('Fireworks deployment routing', () => {
202202 }
203203 } )
204204
205- it ( 'throws FireworksError on non-scaling 503 from deployment' , async ( ) => {
205+ it ( 'falls back to standard API on non-scaling 503 from deployment' , async ( ) => {
206206 const spy = spyDeploymentHours ( true )
207+ const fetchCalls : string [ ] = [ ]
208+ let callCount = 0
207209
208- const mockFetch = mock ( async ( ) => {
209- return new Response (
210- JSON . stringify ( {
211- error : {
212- message : 'Service temporarily unavailable' ,
213- code : 'SERVICE_UNAVAILABLE' ,
214- type : 'error' ,
215- } ,
216- } ) ,
217- { status : 503 , statusText : 'Service Unavailable' } ,
218- )
210+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
211+ const body = JSON . parse ( init ?. body as string )
212+ fetchCalls . push ( body . model )
213+ callCount ++
214+
215+ if ( callCount === 1 ) {
216+ return new Response (
217+ JSON . stringify ( {
218+ error : {
219+ message : 'Service temporarily unavailable' ,
220+ code : 'SERVICE_UNAVAILABLE' ,
221+ type : 'error' ,
222+ } ,
223+ } ) ,
224+ { status : 503 , statusText : 'Service Unavailable' } ,
225+ )
226+ }
227+
228+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
219229 } ) as unknown as typeof globalThis . fetch
220230
221231 try {
222- await expect (
223- createFireworksRequestWithFallback ( {
224- body : minimalBody as never ,
225- originalModel : 'minimax/minimax-m2.5' ,
226- fetch : mockFetch ,
227- logger,
228- useCustomDeployment : true ,
229- sessionId : 'test-user-id' ,
230- } ) ,
231- ) . rejects . toBeInstanceOf ( FireworksError )
232+ const response = await createFireworksRequestWithFallback ( {
233+ body : minimalBody as never ,
234+ originalModel : 'minimax/minimax-m2.5' ,
235+ fetch : mockFetch ,
236+ logger,
237+ useCustomDeployment : true ,
238+ sessionId : 'test-user-id' ,
239+ } )
240+
241+ expect ( response . status ) . toBe ( 200 )
242+ expect ( fetchCalls ) . toHaveLength ( 2 )
243+ expect ( fetchCalls [ 0 ] ) . toBe ( DEPLOYMENT_MODEL_ID )
244+ expect ( fetchCalls [ 1 ] ) . toBe ( STANDARD_MODEL_ID )
245+ // Non-scaling 503 should NOT activate the cooldown
246+ expect ( isDeploymentCoolingDown ( ) ) . toBe ( false )
247+ } finally {
248+ spy . restore ( )
249+ }
250+ } )
251+
252+ it ( 'falls back to standard API on 500 Internal Error from deployment' , async ( ) => {
253+ const spy = spyDeploymentHours ( true )
254+ const fetchCalls : string [ ] = [ ]
255+ let callCount = 0
256+
257+ const mockFetch = mock ( async ( _url : string | URL | Request , init ?: RequestInit ) => {
258+ const body = JSON . parse ( init ?. body as string )
259+ fetchCalls . push ( body . model )
260+ callCount ++
261+
262+ if ( callCount === 1 ) {
263+ return new Response (
264+ JSON . stringify ( { error : 'Internal error' } ) ,
265+ { status : 500 , statusText : 'Internal Server Error' } ,
266+ )
267+ }
268+
269+ return new Response ( JSON . stringify ( { ok : true } ) , { status : 200 } )
270+ } ) as unknown as typeof globalThis . fetch
271+
272+ try {
273+ const response = await createFireworksRequestWithFallback ( {
274+ body : minimalBody as never ,
275+ originalModel : 'minimax/minimax-m2.5' ,
276+ fetch : mockFetch ,
277+ logger,
278+ useCustomDeployment : true ,
279+ sessionId : 'test-user-id' ,
280+ } )
281+
282+ expect ( response . status ) . toBe ( 200 )
283+ expect ( fetchCalls ) . toHaveLength ( 2 )
284+ expect ( fetchCalls [ 0 ] ) . toBe ( DEPLOYMENT_MODEL_ID )
285+ expect ( fetchCalls [ 1 ] ) . toBe ( STANDARD_MODEL_ID )
286+ expect ( isDeploymentCoolingDown ( ) ) . toBe ( false )
232287 } finally {
233288 spy . restore ( )
234289 }
@@ -292,7 +347,7 @@ describe('Fireworks deployment routing', () => {
292347 }
293348 } )
294349
295- it ( 'returns non-200 responses from deployment without fallback (non-503 )' , async ( ) => {
350+ it ( 'returns non-5xx responses from deployment without fallback (e.g. 429 )' , async ( ) => {
296351 const spy = spyDeploymentHours ( true )
297352 const fetchCalls : string [ ] = [ ]
298353
@@ -315,7 +370,7 @@ describe('Fireworks deployment routing', () => {
315370 sessionId : 'test-user-id' ,
316371 } )
317372
318- // Non-503 errors from deployment are returned as-is (caller handles them)
373+ // Non-5xx errors from deployment are returned as-is (caller handles them)
319374 expect ( response . status ) . toBe ( 429 )
320375 expect ( fetchCalls ) . toHaveLength ( 1 )
321376 expect ( fetchCalls [ 0 ] ) . toBe ( DEPLOYMENT_MODEL_ID )
@@ -324,7 +379,7 @@ describe('Fireworks deployment routing', () => {
324379 }
325380 } )
326381
327- it ( 'logs when trying deployment and when falling back' , async ( ) => {
382+ it ( 'logs when trying deployment and when falling back on 5xx ' , async ( ) => {
328383 const spy = spyDeploymentHours ( true )
329384 let callCount = 0
330385
0 commit comments