@@ -35,6 +35,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
3535 request_params = {"messages" : messages_payload , ** config .completion_params }
3636 # Ensure caching is disabled only for this request (review feedback)
3737 request_params ["cache" ] = {"no-cache" : True }
38+ # request_params["timeout"] = 1200 # 20 minutes timeout
3839 request_params ["stream" ] = True # Enable streaming
3940 # Single-level reasoning effort: expect `reasoning_effort` only
4041 effort_val = None
@@ -69,23 +70,24 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
6970 _litellm = importlib .import_module ("litellm" )
7071 acompletion = getattr (_litellm , "acompletion" )
7172
72- # Handle streaming response
73+ # Handle streaming response - following LiteLLM docs pattern
7374 assistant_content = ""
7475 tool_calls = None
75- usage_info = None
76+ chunks = []
77+
78+ response = await acompletion (** request_params )
79+
80+ # Process streaming chunks
81+ async for chunk in response :
82+ chunks .append (chunk ) # Collect chunks for potential use with stream_chunk_builder
7683
77- async for chunk in await acompletion (** request_params ):
7884 if chunk .choices and len (chunk .choices ) > 0 :
7985 delta = chunk .choices [0 ].delta
8086 if hasattr (delta , "content" ) and delta .content :
8187 assistant_content += delta .content
8288 if hasattr (delta , "tool_calls" ) and delta .tool_calls :
8389 tool_calls = delta .tool_calls
8490
85- # Capture usage info from the final chunk
86- if hasattr (chunk , "usage" ) and chunk .usage :
87- usage_info = chunk .usage
88-
8991 converted_tool_calls = None
9092 if tool_calls :
9193 converted_tool_calls = []
@@ -125,18 +127,26 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
125127 )
126128 ]
127129
130+ # Try to get usage info from chunks, fallback to estimates
131+ usage_info = None
132+ for chunk in reversed (chunks ): # Check last chunks first for usage info
133+ if hasattr (chunk , "usage" ) and chunk .usage :
134+ usage_info = chunk .usage
135+ break
136+
128137 if usage_info :
129138 row .execution_metadata .usage = CompletionUsage (
130139 prompt_tokens = usage_info .prompt_tokens ,
131140 completion_tokens = usage_info .completion_tokens ,
132141 total_tokens = usage_info .total_tokens ,
133142 )
134143 else :
135- # Fallback if usage info not available from streaming
144+ # Fallback estimates when streaming doesn't provide usage
145+ estimated_completion_tokens = len (assistant_content .split ()) if assistant_content else 0
136146 row .execution_metadata .usage = CompletionUsage (
137147 prompt_tokens = 0 ,
138- completion_tokens = 0 ,
139- total_tokens = 0 ,
148+ completion_tokens = estimated_completion_tokens ,
149+ total_tokens = estimated_completion_tokens ,
140150 )
141151
142152 row .messages = messages
0 commit comments