Skip to content

Commit 154394f

Browse files
committed
hot fix
1 parent 1d86b97 commit 154394f

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

eval_protocol/proxy/proxy_core/litellm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import json
66
import base64
7+
import asyncio
78
import httpx
89
import logging
910
from uuid6 import uuid7
@@ -14,6 +15,12 @@
1415

1516
logger = logging.getLogger(__name__)
1617

18+
# Retry configuration for 404 errors
19+
# 8 retries with exponential backoff (1, 2, 4, 8, 16, 32, 64, 128 seconds)
20+
# Total wait time: ~255 seconds (~4.25 minutes)
21+
MAX_RETRIES_ON_404 = 8
22+
RETRY_BASE_DELAY_SECONDS = 1
23+
1724

1825
async def handle_chat_completion(
1926
config: ProxyConfig,
@@ -108,12 +115,29 @@ async def handle_chat_completion(
108115
# Forward to LiteLLM
109116
litellm_url = f"{config.litellm_url}/chat/completions"
110117

118+
# Retry loop with exponential backoff for 404 errors
119+
# Initial request
111120
response = await client.post(
112121
litellm_url,
113122
json=data, # httpx will serialize and set correct Content-Length
114123
headers=headers,
115124
)
116125

126+
for attempt in range(MAX_RETRIES_ON_404):
127+
if response.status_code != 404:
128+
break
129+
130+
# Wait with exponential backoff before retry
131+
delay = RETRY_BASE_DELAY_SECONDS * (2**attempt)
132+
logger.warning(f"Got 404 from LiteLLM, retrying in {delay}s (attempt {attempt + 1}/{MAX_RETRIES_ON_404})")
133+
await asyncio.sleep(delay)
134+
135+
response = await client.post(
136+
litellm_url,
137+
json=data,
138+
headers=headers,
139+
)
140+
117141
# Register insertion_id in Redis only on successful response
118142
if response.status_code == 200 and insertion_id is not None and rollout_id is not None:
119143
register_insertion_id(redis_client, rollout_id, insertion_id)

0 commit comments

Comments
 (0)