Skip to content

Commit e71c5d8

Browse files
committed
try again
1 parent a3e7941 commit e71c5d8

1 file changed

Lines changed: 20 additions & 10 deletions

File tree

eval_protocol/pytest/default_single_turn_rollout_process.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
3535
request_params = {"messages": messages_payload, **config.completion_params}
3636
# Ensure caching is disabled only for this request (review feedback)
3737
request_params["cache"] = {"no-cache": True}
38+
# request_params["timeout"] = 1200 # 20 minutes timeout
3839
request_params["stream"] = True # Enable streaming
3940
# Single-level reasoning effort: expect `reasoning_effort` only
4041
effort_val = None
@@ -69,23 +70,24 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
6970
_litellm = importlib.import_module("litellm")
7071
acompletion = getattr(_litellm, "acompletion")
7172

72-
# Handle streaming response
73+
# Handle streaming response - following LiteLLM docs pattern
7374
assistant_content = ""
7475
tool_calls = None
75-
usage_info = None
76+
chunks = []
77+
78+
response = await acompletion(**request_params)
79+
80+
# Process streaming chunks
81+
async for chunk in response:
82+
chunks.append(chunk) # Collect chunks for potential use with stream_chunk_builder
7683

77-
async for chunk in await acompletion(**request_params):
7884
if chunk.choices and len(chunk.choices) > 0:
7985
delta = chunk.choices[0].delta
8086
if hasattr(delta, "content") and delta.content:
8187
assistant_content += delta.content
8288
if hasattr(delta, "tool_calls") and delta.tool_calls:
8389
tool_calls = delta.tool_calls
8490

85-
# Capture usage info from the final chunk
86-
if hasattr(chunk, "usage") and chunk.usage:
87-
usage_info = chunk.usage
88-
8991
converted_tool_calls = None
9092
if tool_calls:
9193
converted_tool_calls = []
@@ -125,18 +127,26 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
125127
)
126128
]
127129

130+
# Try to get usage info from chunks, fallback to estimates
131+
usage_info = None
132+
for chunk in reversed(chunks): # Check last chunks first for usage info
133+
if hasattr(chunk, "usage") and chunk.usage:
134+
usage_info = chunk.usage
135+
break
136+
128137
if usage_info:
129138
row.execution_metadata.usage = CompletionUsage(
130139
prompt_tokens=usage_info.prompt_tokens,
131140
completion_tokens=usage_info.completion_tokens,
132141
total_tokens=usage_info.total_tokens,
133142
)
134143
else:
135-
# Fallback if usage info not available from streaming
144+
# Fallback estimates when streaming doesn't provide usage
145+
estimated_completion_tokens = len(assistant_content.split()) if assistant_content else 0
136146
row.execution_metadata.usage = CompletionUsage(
137147
prompt_tokens=0,
138-
completion_tokens=0,
139-
total_tokens=0,
148+
completion_tokens=estimated_completion_tokens,
149+
total_tokens=estimated_completion_tokens,
140150
)
141151

142152
row.messages = messages

0 commit comments

Comments
 (0)