Skip to content

Commit 95cf759

Browse files
committed
Revert "add finish reason (#421)"
This reverts commit 1d07878.
1 parent c1d429a commit 95cf759

5 files changed

Lines changed: 5 additions & 96 deletions

File tree

eval_protocol/adapters/fireworks_tracing.py

Lines changed: 3 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import logging
99
import requests
1010
from datetime import datetime
11-
import ast
12-
import json
13-
import os
1411
from typing import Any, Dict, List, Optional, Protocol
12+
import os
1513

1614
from eval_protocol.models import EvaluationRow, InputMetadata, ExecutionMetadata, Message
1715
from .base import BaseAdapter
@@ -46,43 +44,6 @@ def __call__(
4644
...
4745

4846

49-
def extract_openai_response(observations: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
50-
"""Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51-
52-
Args:
53-
observations: List of observation dictionaries from the trace
54-
55-
Returns:
56-
Dict with all attributes parsed. Or None if not found.
57-
"""
58-
for obs in observations:
59-
if obs.get("name") == "raw_gen_ai_request" and obs.get("type") == "SPAN":
60-
metadata = obs.get("metadata") or {}
61-
attributes = metadata.get("attributes") or {}
62-
63-
result: Dict[str, Any] = {}
64-
65-
for key, value in attributes.items():
66-
# Try to parse stringified objects (could be Python repr or JSON)
67-
if isinstance(value, str) and value.startswith(("[", "{")):
68-
try:
69-
result[key] = ast.literal_eval(value)
70-
except Exception as e:
71-
logger.debug("Failed to parse %s with ast.literal_eval: %s", key, e)
72-
try:
73-
result[key] = json.loads(value)
74-
except Exception as e:
75-
logger.debug("Failed to parse %s with json.loads: %s", key, e)
76-
result[key] = value
77-
else:
78-
result[key] = value
79-
80-
if result:
81-
return result
82-
83-
return None
84-
85-
8647
def convert_trace_dict_to_evaluation_row(
8748
trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
8849
) -> Optional[EvaluationRow]:
@@ -135,14 +96,6 @@ def convert_trace_dict_to_evaluation_row(
13596
):
13697
break # Break early if we've found all the metadata we need
13798

138-
observations = trace.get("observations") or []
139-
# We can only extract when stored in OTEL format.
140-
openai_response = extract_openai_response(observations)
141-
if openai_response:
142-
choices = openai_response.get("llm.openai.choices")
143-
if choices and len(choices) > 0:
144-
execution_metadata.finish_reason = choices[0].get("finish_reason")
145-
14699
return EvaluationRow(
147100
messages=messages,
148101
tools=tools,
@@ -207,7 +160,7 @@ def extract_messages_from_trace_dict(
207160
# Fallback: use the last GENERATION observation which typically contains full chat history
208161
if not messages:
209162
try:
210-
all_observations = trace.get("observations") or []
163+
all_observations = trace.get("observations", [])
211164
gens = [obs for obs in all_observations if obs.get("type") == "GENERATION"]
212165
if gens:
213166
gens.sort(key=lambda x: x.get("start_time", ""))
@@ -233,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
233186
The final generation dictionary, or None if not found
234187
"""
235188
# Get all observations from the trace
236-
all_observations = trace.get("observations") or []
189+
all_observations = trace.get("observations", [])
237190

238191
# Find a span with the given name that has generation children
239192
parent_span = None

eval_protocol/proxy/proxy_core/langfuse.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ def _serialize_trace_to_dict(trace_full: Any) -> Dict[str, Any]:
5050
"input": getattr(obs, "input", None),
5151
"output": getattr(obs, "output", None),
5252
"parent_observation_id": getattr(obs, "parent_observation_id", None),
53-
"metadata": getattr(obs, "metadata", None),
5453
}
5554
for obs in getattr(trace_full, "observations", [])
5655
]

eval_protocol/reward_function.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .models import EvaluateResult, MetricResult
1313
from .typed_interface import reward_function
1414

15+
logging.basicConfig(level=logging.INFO)
1516
logger = logging.getLogger(__name__)
1617

1718
T = TypeVar("T", bound=Callable[..., EvaluateResult])

tests/remote_server/remote_server.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,6 @@
1313

1414
app = FastAPI()
1515

16-
# Configure logging for the remote server (required for INFO-level logs to be emitted)
17-
logging.basicConfig(level=logging.INFO, format="%(name)s - %(levelname)s - %(message)s")
18-
1916
# Attach Fireworks tracing handler to root logger
2017
fireworks_handler = FireworksTracingHttpHandler()
2118
logging.getLogger().addHandler(fireworks_handler)

tests/remote_server/test_remote_fireworks.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# AUTO SERVER STARTUP: Server is automatically started and stopped by the test
22

3-
import logging
43
import subprocess
54
import socket
65
import time
@@ -20,23 +19,10 @@
2019
ROLLOUT_IDS = set()
2120

2221

23-
class StatusLogCaptureHandler(logging.Handler):
24-
"""Custom handler to capture status log messages."""
25-
26-
def __init__(self):
27-
super().__init__()
28-
self.status_100_messages: List[str] = []
29-
30-
def emit(self, record):
31-
msg = record.getMessage() # Use getMessage(), not .message attribute
32-
if "Found Fireworks log" in msg and "with status code 100" in msg:
33-
self.status_100_messages.append(msg)
34-
35-
3622
@pytest.fixture(autouse=True)
3723
def check_rollout_coverage(monkeypatch):
3824
"""
39-
Ensure we attempted to fetch remote traces for each rollout and received status logs.
25+
Ensure we attempted to fetch remote traces for each rollout.
4026
4127
This wraps the built-in default_fireworks_output_data_loader (without making it configurable)
4228
and tracks rollout_ids passed through its DataLoaderConfig.
@@ -51,32 +37,9 @@ def wrapped_loader(config: DataLoaderConfig) -> DynamicDataLoader:
5137
return original_loader(config)
5238

5339
monkeypatch.setattr(remote_rollout_processor_module, "default_fireworks_output_data_loader", wrapped_loader)
54-
55-
# Add custom handler to capture status logs
56-
status_handler = StatusLogCaptureHandler()
57-
status_handler.setLevel(logging.INFO)
58-
rrp_logger = logging.getLogger("eval_protocol.pytest.remote_rollout_processor")
59-
rrp_logger.addHandler(status_handler)
60-
# Ensure the logger level allows INFO messages through
61-
original_level = rrp_logger.level
62-
rrp_logger.setLevel(logging.INFO)
63-
6440
yield
65-
66-
# Cleanup handler and restore level
67-
rrp_logger.removeHandler(status_handler)
68-
rrp_logger.setLevel(original_level)
69-
70-
# After test completes, verify we saw status logs for all 3 rollouts
7141
assert len(ROLLOUT_IDS) == 3, f"Expected to see 3 rollout_ids, but only saw {ROLLOUT_IDS}"
7242

73-
# Check that we received "Found Fireworks log ... with status code 100" for each rollout
74-
assert len(status_handler.status_100_messages) == 3, (
75-
f"Expected 3 'Found Fireworks log ... with status code 100' messages, but found {len(status_handler.status_100_messages)}. "
76-
f"This means the status logs from the remote server were not received. "
77-
f"Messages captured: {status_handler.status_100_messages}"
78-
)
79-
8043

8144
def find_available_port() -> int:
8245
"""Find an available port on localhost"""
@@ -178,8 +141,4 @@ async def test_remote_rollout_and_fetch_fireworks(row: EvaluationRow) -> Evaluat
178141
assert "data_loader_type" in row.input_metadata.dataset_info
179142
assert "data_loader_num_rows" in row.input_metadata.dataset_info
180143

181-
assert row.execution_metadata.finish_reason == "stop", (
182-
f"Expected finish_reason='stop', got {row.execution_metadata.finish_reason}"
183-
)
184-
185144
return row

0 commit comments

Comments
 (0)