88import logging
99import requests
1010from datetime import datetime
11- import ast
12- import json
13- import os
1411from typing import Any , Dict , List , Optional , Protocol
12+ import os
1513
1614from eval_protocol .models import EvaluationRow , InputMetadata , ExecutionMetadata , Message
1715from .base import BaseAdapter
@@ -46,43 +44,6 @@ def __call__(
4644 ...
4745
4846
49- def extract_openai_response (observations : List [Dict [str , Any ]]) -> Optional [Dict [str , Any ]]:
50- """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51-
52- Args:
53- observations: List of observation dictionaries from the trace
54-
55- Returns:
56- Dict with all attributes parsed. Or None if not found.
57- """
58- for obs in observations :
59- if obs .get ("name" ) == "raw_gen_ai_request" and obs .get ("type" ) == "SPAN" :
60- metadata = obs .get ("metadata" ) or {}
61- attributes = metadata .get ("attributes" ) or {}
62-
63- result : Dict [str , Any ] = {}
64-
65- for key , value in attributes .items ():
66- # Try to parse stringified objects (could be Python repr or JSON)
67- if isinstance (value , str ) and value .startswith (("[" , "{" )):
68- try :
69- result [key ] = ast .literal_eval (value )
70- except Exception as e :
71- logger .debug ("Failed to parse %s with ast.literal_eval: %s" , key , e )
72- try :
73- result [key ] = json .loads (value )
74- except Exception as e :
75- logger .debug ("Failed to parse %s with json.loads: %s" , key , e )
76- result [key ] = value
77- else :
78- result [key ] = value
79-
80- if result :
81- return result
82-
83- return None
84-
85-
8647def convert_trace_dict_to_evaluation_row (
8748 trace : Dict [str , Any ], include_tool_calls : bool = True , span_name : Optional [str ] = None
8849) -> Optional [EvaluationRow ]:
@@ -135,14 +96,6 @@ def convert_trace_dict_to_evaluation_row(
13596 ):
13697 break # Break early if we've found all the metadata we need
13798
138- observations = trace .get ("observations" ) or []
139- # We can only extract when stored in OTEL format.
140- openai_response = extract_openai_response (observations )
141- if openai_response :
142- choices = openai_response .get ("llm.openai.choices" )
143- if choices and len (choices ) > 0 :
144- execution_metadata .finish_reason = choices [0 ].get ("finish_reason" )
145-
14699 return EvaluationRow (
147100 messages = messages ,
148101 tools = tools ,
@@ -207,7 +160,7 @@ def extract_messages_from_trace_dict(
207160 # Fallback: use the last GENERATION observation which typically contains full chat history
208161 if not messages :
209162 try :
210- all_observations = trace .get ("observations" ) or []
163+ all_observations = trace .get ("observations" , [])
211164 gens = [obs for obs in all_observations if obs .get ("type" ) == "GENERATION" ]
212165 if gens :
213166 gens .sort (key = lambda x : x .get ("start_time" , "" ))
@@ -233,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
233186 The final generation dictionary, or None if not found
234187 """
235188 # Get all observations from the trace
236- all_observations = trace .get ("observations" ) or []
189+ all_observations = trace .get ("observations" , [])
237190
238191 # Find a span with the given name that has generation children
239192 parent_span = None
0 commit comments