1+ import { EvaluationRow , Message } from './eval-protocol' ;
2+
3+ /**
4+ * Utility functions for working with EvaluationRow data
5+ * These mirror the methods from the Python EvaluationRow class
6+ */
7+
8+ export const evalRowUtils = {
9+ /**
10+ * Returns True if this represents a trajectory evaluation (has step_outputs),
11+ * False if it represents a single turn evaluation.
12+ */
13+ isTrajectoryEvaluation : ( row : EvaluationRow ) : boolean => {
14+ return (
15+ row . evaluation_result !== undefined &&
16+ row . evaluation_result . step_outputs !== undefined &&
17+ row . evaluation_result . step_outputs . length > 0
18+ ) ;
19+ } ,
20+
21+ /**
22+ * Returns the number of messages in the conversation.
23+ */
24+ getConversationLength : ( row : EvaluationRow ) : number => {
25+ return row . messages . length ;
26+ } ,
27+
28+ /**
29+ * Returns the system message from the conversation. Returns empty Message if none found.
30+ */
31+ getSystemMessage : ( row : EvaluationRow ) : Message => {
32+ const systemMessages = row . messages . filter ( msg => msg . role === 'system' ) ;
33+ if ( systemMessages . length === 0 ) {
34+ return { role : 'system' , content : '' } ;
35+ }
36+ return systemMessages [ 0 ] ;
37+ } ,
38+
39+ /**
40+ * Returns only the assistant messages from the conversation.
41+ */
42+ getAssistantMessages : ( row : EvaluationRow ) : Message [ ] => {
43+ return row . messages . filter ( msg => msg . role === 'assistant' ) ;
44+ } ,
45+
46+ /**
47+ * Returns only the user messages from the conversation.
48+ */
49+ getUserMessages : ( row : EvaluationRow ) : Message [ ] => {
50+ return row . messages . filter ( msg => msg . role === 'user' ) ;
51+ } ,
52+
53+ /**
54+ * Helper method to get a specific value from input_metadata.
55+ */
56+ getInputMetadata : ( row : EvaluationRow , key : string , defaultValue ?: any ) : any => {
57+ if ( ! row . input_metadata ) {
58+ return defaultValue ;
59+ }
60+ return ( row . input_metadata as any ) [ key ] ?? defaultValue ;
61+ } ,
62+
63+ /**
64+ * Get number of steps from control_plane_step data.
65+ */
66+ getSteps : ( row : EvaluationRow ) : number => {
67+ return row . messages . filter ( msg => msg . control_plane_step ) . length ;
68+ } ,
69+
70+ /**
71+ * Get total reward from control_plane_step data.
72+ */
73+ getTotalReward : ( row : EvaluationRow ) : number => {
74+ const messagesWithControlPlane = row . messages . filter ( msg => msg . control_plane_step ) ;
75+ if ( messagesWithControlPlane . length === 0 ) {
76+ return 0.0 ;
77+ }
78+ return messagesWithControlPlane . reduce ( ( total , msg ) => {
79+ const reward = ( msg . control_plane_step as any ) ?. reward ;
80+ return total + ( typeof reward === 'number' ? reward : 0 ) ;
81+ } , 0.0 ) ;
82+ } ,
83+
84+ /**
85+ * Get termination status from control_plane_step data.
86+ */
87+ getTerminated : ( row : EvaluationRow ) : boolean => {
88+ const messagesWithControlPlane = row . messages . filter ( msg => msg . control_plane_step ) ;
89+ if ( messagesWithControlPlane . length === 0 ) {
90+ return false ;
91+ }
92+ return messagesWithControlPlane . some ( msg => {
93+ return ( msg . control_plane_step as any ) ?. terminated === true ;
94+ } ) ;
95+ } ,
96+
97+ /**
98+ * Get termination reason from the final control_plane_step data.
99+ */
100+ getTerminationReason : ( row : EvaluationRow ) : string => {
101+ // Find the last message with control_plane_step that has termination_reason
102+ for ( let i = row . messages . length - 1 ; i >= 0 ; i -- ) {
103+ const msg = row . messages [ i ] ;
104+ if ( msg . control_plane_step && ( msg . control_plane_step as any ) ?. termination_reason ) {
105+ return ( msg . control_plane_step as any ) . termination_reason ;
106+ }
107+ }
108+ return 'unknown' ;
109+ }
110+ } ;
111+
112+ /**
113+ * Utility functions for working with Message data
114+ */
115+ export const messageUtils = {
116+ /**
117+ * Check if a message has tool calls
118+ */
119+ hasToolCalls : ( message : Message ) : boolean => {
120+ return message . tool_calls !== undefined && message . tool_calls . length > 0 ;
121+ } ,
122+
123+ /**
124+ * Check if a message has function calls
125+ */
126+ hasFunctionCall : ( message : Message ) : boolean => {
127+ return message . function_call !== undefined ;
128+ } ,
129+
130+ /**
131+ * Get the content as a string, handling both string and array content types
132+ */
133+ getContentAsString : ( message : Message ) : string => {
134+ if ( typeof message . content === 'string' ) {
135+ return message . content ;
136+ }
137+ if ( Array . isArray ( message . content ) ) {
138+ return message . content
139+ . filter ( part => part . type === 'text' )
140+ . map ( part => part . text )
141+ . join ( '' ) ;
142+ }
143+ return '' ;
144+ }
145+ } ;
146+
147+ /**
148+ * Utility functions for working with EvaluateResult data
149+ */
150+ export const evaluateResultUtils = {
151+ /**
152+ * Check if the evaluation result has step outputs (trajectory evaluation)
153+ */
154+ hasStepOutputs : ( result : any ) : boolean => {
155+ return result . step_outputs !== undefined && result . step_outputs . length > 0 ;
156+ } ,
157+
158+ /**
159+ * Get the total base reward from step outputs
160+ */
161+ getTotalBaseReward : ( result : any ) : number => {
162+ if ( ! result . step_outputs ) {
163+ return 0.0 ;
164+ }
165+ return result . step_outputs . reduce ( ( total : number , step : any ) => {
166+ return total + ( step . base_reward || 0 ) ;
167+ } , 0.0 ) ;
168+ } ,
169+
170+ /**
171+ * Get the number of steps from step outputs
172+ */
173+ getStepCount : ( result : any ) : number => {
174+ return result . step_outputs ?. length || 0 ;
175+ }
176+ } ;
0 commit comments