88from eval_protocol .models import EvaluationRow , Status
99from eval_protocol .data_loader .dynamic_data_loader import DynamicDataLoader
1010from eval_protocol .types .remote_rollout_processor import ElasticsearchConfig , InitRequest , RolloutMetadata
11+ from eval_protocol .adapters .fireworks_tracing import create_fireworks_tracing_adapter
12+ from eval_protocol .quickstart .utils import filter_longest_conversation
1113from .rollout_processor import RolloutProcessor
1214from .types import RolloutProcessorConfig
1315from .elasticsearch_setup import ElasticsearchSetup
1820logger = logging .getLogger (__name__ )
1921
2022
23+ def _default_output_data_loader (rollout_id : str , base_url : str ) -> DynamicDataLoader :
24+ """Default output data loader that fetches traces from Fireworks tracing proxy.
25+
26+ Args:
27+ rollout_id: The rollout ID to filter traces by
28+
29+ Returns:
30+ DynamicDataLoader configured to fetch and process traces
31+ """
32+
33+ def fetch_traces () -> List [EvaluationRow ]:
34+ adapter = create_fireworks_tracing_adapter (base_url = base_url )
35+ return adapter .get_evaluation_rows (tags = [f"rollout_id:{ rollout_id } " ], max_retries = 5 )
36+
37+ return DynamicDataLoader (generators = [fetch_traces ], preprocess_fn = filter_longest_conversation )
38+
39+
2140class RemoteRolloutProcessor (RolloutProcessor ):
2241 """
2342 Rollout processor that triggers a remote HTTP server to perform the rollout.
2443
44+ By default, fetches traces from the Fireworks tracing proxy using rollout_id tags.
45+ You can provide a custom output_data_loader for different tracing backends.
46+
2547 See https://evalprotocol.io/tutorial/remote-rollout-processor for documentation.
2648 """
2749
@@ -32,7 +54,7 @@ def __init__(
3254 model_base_url : str = "https://tracing.fireworks.ai" ,
3355 poll_interval : float = 1.0 ,
3456 timeout_seconds : float = 120.0 ,
35- output_data_loader : Callable [[str ], DynamicDataLoader ],
57+ output_data_loader : Optional [ Callable [[str , str ], DynamicDataLoader ]] = None ,
3658 disable_elastic_search : bool = False ,
3759 elastic_search_config : Optional [ElasticsearchConfig ] = None ,
3860 ):
@@ -44,7 +66,7 @@ def __init__(
4466 self ._remote_base_url = os .getenv ("EP_REMOTE_ROLLOUT_PROCESSOR_BASE_URL" )
4567 self ._poll_interval = poll_interval
4668 self ._timeout_seconds = timeout_seconds
47- self ._output_data_loader = output_data_loader
69+ self ._output_data_loader = output_data_loader or _default_output_data_loader
4870 self ._disable_elastic_search = disable_elastic_search
4971 self ._elastic_search_config = elastic_search_config
5072
@@ -242,7 +264,7 @@ def _get_status() -> Dict[str, Any]:
242264 if row .execution_metadata .rollout_id is None :
243265 raise ValueError ("Rollout ID is required in RemoteRolloutProcessor" )
244266
245- data_loader = self ._output_data_loader (row .execution_metadata .rollout_id )
267+ data_loader = self ._output_data_loader (row .execution_metadata .rollout_id , model_base_url )
246268
247269 def _load_data ():
248270 return data_loader .load ()
0 commit comments