@@ -37,36 +37,36 @@ except ImportError:
3737
3838class YourCustomAdapter :
3939 """ Adapter for integrating with Your Custom Data Source.
40-
40+
4141 This adapter loads data from Your Custom Data Source and converts it
4242 to EvaluationRow format for use in evaluation pipelines.
43-
43+
4444 Examples:
4545 Basic usage:
4646 >>> adapter = YourCustomAdapter(api_key="your_key")
4747 >>> rows = list(adapter.get_evaluation_rows(limit=10))
4848 """
49-
49+
5050 def __init__ (self , ** config ):
5151 """ Initialize the adapter with configuration."""
5252 if not DEPENDENCY_AVAILABLE :
5353 raise ImportError (" your_external_library not installed" )
54-
54+
5555 # Initialize your client/connection here
5656 self .client = your_external_library.Client(** config)
57-
57+
5858 def get_evaluation_rows (self , ** kwargs ) -> Iterator[EvaluationRow]:
5959 """ Main method to fetch and convert data to EvaluationRow format.
60-
60+
6161 Args:
6262 **kwargs: Adapter-specific parameters
63-
63+
6464 Yields:
6565 EvaluationRow: Converted evaluation rows
6666 """
6767 # Implement your data fetching logic
6868 raw_data = self .client.fetch_data(** kwargs)
69-
69+
7070 for item in raw_data:
7171 try :
7272 eval_row = self ._convert_to_evaluation_row(item)
@@ -75,51 +75,51 @@ class YourCustomAdapter:
7575 except Exception as e:
7676 logger.warning(f " Failed to convert item: { e} " )
7777 continue
78-
78+
7979 def _convert_to_evaluation_row (self , raw_item : Any) -> Optional[EvaluationRow]:
8080 """ Convert a raw data item to EvaluationRow format.
81-
81+
8282 Args:
8383 raw_item: Raw data item from your source
84-
84+
8585 Returns:
8686 EvaluationRow or None if conversion fails
8787 """
8888 # Extract messages from your data format
8989 messages = self ._extract_messages(raw_item)
90-
90+
9191 # Extract metadata
9292 input_metadata = self ._create_input_metadata(raw_item)
93-
93+
9494 # Extract ground truth if available
9595 ground_truth = self ._extract_ground_truth(raw_item)
96-
96+
9797 # Extract tools if available (for tool calling scenarios)
9898 tools = self ._extract_tools(raw_item)
99-
99+
100100 return EvaluationRow(
101101 messages = messages,
102102 tools = tools,
103103 input_metadata = input_metadata,
104104 ground_truth = ground_truth,
105105 )
106-
106+
107107 def _extract_messages (self , raw_item : Any) -> List[Message]:
108108 """ Extract conversation messages from raw data."""
109109 # Implement message extraction logic
110110 # Convert your data format to List[Message]
111111 pass
112-
112+
113113 def _create_input_metadata (self , raw_item : Any) -> InputMetadata:
114114 """ Create InputMetadata from raw data."""
115115 # Implement metadata extraction
116116 pass
117-
117+
118118 def _extract_ground_truth (self , raw_item : Any) -> Optional[str ]:
119119 """ Extract ground truth if available."""
120120 # Implement ground truth extraction
121121 pass
122-
122+
123123 def _extract_tools (self , raw_item : Any) -> Optional[List[Dict[str , Any]]]:
124124 """ Extract tool definitions if available."""
125125 # Implement tool extraction for tool calling scenarios
@@ -149,7 +149,7 @@ message = Message(
149149 content = " I'll help you with that calculation." ,
150150 tool_calls = [{
151151 " id" : " call_123" ,
152- " type" : " function" ,
152+ " type" : " function" ,
153153 " function" : {
154154 " name" : " calculate" ,
155155 " arguments" : ' {"x": 5, "y": 3}'
@@ -185,7 +185,7 @@ input_metadata = InputMetadata(
185185 },
186186 session_data = {
187187 " user_id" : " user123" ,
188- " session_id" : " session456" ,
188+ " session_id" : " session456" ,
189189 " timestamp" : " 2024-01-01T00:00:00Z" ,
190190 }
191191)
@@ -259,7 +259,7 @@ def get_evaluation_rows(self, **kwargs) -> Iterator[EvaluationRow]:
259259 except Exception as e:
260260 logger.error(f " Failed to fetch data: { e} " )
261261 return
262-
262+
263263 for item in data:
264264 try :
265265 row = self ._convert_to_evaluation_row(item)
@@ -298,36 +298,36 @@ from eval_protocol.models import EvaluationRow
298298
299299class TestYourCustomAdapter :
300300 """ Test suite for YourCustomAdapter."""
301-
301+
302302 def test_initialization (self ):
303303 """ Test adapter initialization."""
304304 adapter = YourCustomAdapter(api_key = " test_key" )
305305 assert adapter.client is not None
306-
306+
307307 def test_get_evaluation_rows (self ):
308308 """ Test conversion to EvaluationRow format."""
309309 adapter = YourCustomAdapter(api_key = " test_key" )
310-
310+
311311 # Mock the external API response
312312 with patch.object(adapter.client, ' fetch_data' ) as mock_fetch:
313313 mock_fetch.return_value = [
314314 # Mock data in your format
315315 {" id" : " 1" , " question" : " Test?" , " answer" : " Yes" }
316316 ]
317-
317+
318318 rows = list (adapter.get_evaluation_rows(limit = 1 ))
319-
319+
320320 assert len (rows) == 1
321321 assert isinstance (rows[0 ], EvaluationRow)
322322 assert len (rows[0 ].messages) > 0
323-
323+
324324 def test_error_handling (self ):
325325 """ Test error handling."""
326326 adapter = YourCustomAdapter(api_key = " test_key" )
327-
327+
328328 with patch.object(adapter.client, ' fetch_data' ) as mock_fetch:
329329 mock_fetch.side_effect = Exception (" API Error" )
330-
330+
331331 rows = list (adapter.get_evaluation_rows())
332332 assert len (rows) == 0 # Should handle error gracefully
333333```
@@ -341,18 +341,18 @@ For simple chat data:
341341``` python
342342def _extract_messages (self , conversation : Dict) -> List[Message]:
343343 messages = []
344-
344+
345345 # Add system prompt if available
346346 if conversation.get(' system_prompt' ):
347347 messages.append(Message(role = " system" , content = conversation[' system_prompt' ]))
348-
348+
349349 # Add conversation turns
350350 for turn in conversation[' turns' ]:
351351 messages.append(Message(
352352 role = turn[' role' ],
353353 content = turn[' content' ]
354354 ))
355-
355+
356356 return messages
357357```
358358
@@ -363,27 +363,27 @@ For tool calling scenarios:
363363``` python
364364def _extract_messages (self , trace : Dict) -> List[Message]:
365365 messages = []
366-
366+
367367 for step in trace[' steps' ]:
368368 if step[' type' ] == ' user_message' :
369369 messages.append(Message(role = " user" , content = step[' content' ]))
370-
370+
371371 elif step[' type' ] == ' assistant_message' :
372372 message = Message(role = " assistant" , content = step.get(' content' ))
373-
373+
374374 # Add tool calls if present
375375 if step.get(' tool_calls' ):
376376 message.tool_calls = step[' tool_calls' ]
377-
377+
378378 messages.append(message)
379-
379+
380380 elif step[' type' ] == ' tool_response' :
381381 messages.append(Message(
382382 role = " tool" ,
383383 content = step[' content' ],
384384 tool_call_id = step[' tool_call_id' ]
385385 ))
386-
386+
387387 return messages
388388```
389389
@@ -515,10 +515,10 @@ Here are some potential adapters that would be valuable:
515515
516516- ** OpenAI Evals** : Load data from OpenAI's evals repository
517517- ** LLM Evaluation Datasets** : MMLU, HellaSwag, etc.
518- - ** Chat Platforms** : Discord, Slack conversation exports
518+ - ** Chat Platforms** : Discord, Slack conversation exports
519519- ** Monitoring Tools** : Other observability platforms
520520- ** Custom APIs** : Company-specific data sources
521521- ** File Formats** : Parquet, Excel, database exports
522522- ** Research Datasets** : Academic benchmarks and competitions
523523
524- We welcome contributions for any of these or other creative integrations!
524+ We welcome contributions for any of these or other creative integrations!
0 commit comments