11import asyncio
22from typing import List
33
4- from litellm import acompletion
5- import litellm
6- from openai .types .chat .chat_completion_message import ChatCompletionMessageToolCall
4+ import logging
5+ import os
76
87from eval_protocol .dataset_logger import default_logger
9- from eval_protocol .models import EvaluationRow , Message
8+ from eval_protocol .models import EvaluationRow , Message , ChatCompletionMessageToolCall
109from eval_protocol .pytest .types import RolloutProcessorConfig
1110
1211
@@ -15,15 +14,20 @@ async def default_single_turn_rollout_processor(
1514) -> List [EvaluationRow ]:
1615 """Generate a single response from any supported model provider using LiteLLM."""
1716
18- # Explicitly disable LiteLLM caching to avoid reused responses across runs
17+ # Quiet LiteLLM logs in test runs unless user overrode
1918 try :
20- litellm .cache = None
21- # Some versions expose a helper; ignore if unavailable
22- if hasattr (litellm , "disable_cache" ):
23- litellm .disable_cache () # type: ignore[call-arg]
19+ if os .environ .get ("LITELLM_LOG" ) is None :
20+ os .environ ["LITELLM_LOG" ] = "ERROR"
21+ _llog = logging .getLogger ("LiteLLM" )
22+ _llog .setLevel (logging .CRITICAL )
23+ _llog .propagate = False
24+ for _h in list (_llog .handlers ):
25+ _llog .removeHandler (_h )
2426 except Exception :
2527 pass
2628
29+ # Do not modify global LiteLLM cache. Disable caching per-request instead.
30+
2731 async def process_row (row : EvaluationRow ) -> EvaluationRow :
2832 """Process a single row asynchronously."""
2933 if len (row .messages ) == 0 :
@@ -32,6 +36,8 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
3236 messages_payload = [{"role" : m .role , "content" : m .content } for m in row .messages ]
3337
3438 request_params = {"model" : config .model , "messages" : messages_payload , ** config .input_params }
39+ # Ensure caching is disabled only for this request (review feedback)
40+ request_params ["cache" ] = {"no-cache" : True }
3541 # Allow passing reasoning effort to Fireworks via LiteLLM using extra_body
3642 # Expected: config.input_params may contain {"reasoning": {"effort": "low|medium|high"}}
3743 if "reasoning" in config .input_params :
@@ -41,6 +47,10 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
4147 if row .tools is not None :
4248 request_params ["tools" ] = row .tools
4349
50+ # Dynamic import to avoid static dependency/lint errors if LiteLLM isn't installed yet
51+ import importlib
52+ _litellm = importlib .import_module ("litellm" )
53+ acompletion = getattr (_litellm , "acompletion" )
4454 response = await acompletion (** request_params )
4555
4656 assistant_content = response .choices [0 ].message .content or ""
0 commit comments