22Lilac ML integration for Eval Protocol.
33
44This adapter provides utilities for converting between EvaluationRow format
5- and Lilac dataset format, enabling powerful data curation features like :
5+ and pandas DataFrame format, enabling integration with Lilac for data curation :
66- Clustering and deduplication
77- Semantic search and filtering
88- Quality scoring with embeddings
99- Interactive data exploration
1010
11- Prerequisites:
12- pip install 'lilac[all]'
13-
1411Example usage:
1512 >>> from eval_protocol.adapters.lilac import (
16- ... evaluation_rows_to_lilac_dataset ,
17- ... lilac_dataset_to_evaluation_rows ,
13+ ... evaluation_rows_to_dataframe ,
14+ ... dataframe_to_evaluation_rows ,
1815 ... )
1916 >>>
20- >>> # Convert EvaluationRows to Lilac dataset
21- >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
17+ >>> # Convert EvaluationRows to DataFrame for Lilac
18+ >>> df = evaluation_rows_to_dataframe(rows)
19+ >>> df['user_query'] = df['messages_json'].apply(extract_user_message)
2220 >>>
23- >>> # Do Lilac operations (cluster, filter, etc.)
24- >>> dataset.cluster('messages_json') # or create your own text column
21+ >>> # Use with Lilac for clustering
22+ >>> import lilac as ll
23+ >>> dataset = ll.create_dataset(ll.DatasetConfig(
24+ ... namespace='local', name='my-data', source=ll.PandasSource(df)
25+ ... ))
26+ >>> dataset.cluster('user_query')
2527 >>>
2628 >>> # Convert back to EvaluationRows
27- >>> processed_rows = lilac_dataset_to_evaluation_rows(dataset)
29+ >>> processed_df = dataset.to_pandas(include_signals=True)
30+ >>> processed_rows = dataframe_to_evaluation_rows(processed_df)
2831"""
2932
3033from __future__ import annotations
3134
3235import json
3336import logging
34- from typing import Any , TYPE_CHECKING
37+ from typing import Any
3538
3639import pandas as pd
3740
4346 Message ,
4447)
4548
46- if TYPE_CHECKING :
47- import lilac as ll
48-
4949logger = logging .getLogger (__name__ )
5050
51- # Check if lilac is available
52- try :
53- import lilac as ll
54-
55- LILAC_AVAILABLE = True
56- except ImportError :
57- LILAC_AVAILABLE = False
58- ll = None # type: ignore
59-
60-
61- def _ensure_lilac_available () -> None :
62- """Raise ImportError if lilac is not installed."""
63- if not LILAC_AVAILABLE :
64- raise ImportError ("Lilac is not installed. Install it with: pip install 'lilac[all]'" )
65-
6651
6752# =============================================================================
68- # Core Conversion Functions
53+ # Internal Helpers
6954# =============================================================================
7055
7156
@@ -86,13 +71,13 @@ def _deserialize_messages(messages_json: str | None) -> list[Message]:
8671 return []
8772
8873
89- def evaluation_row_to_dict (row : EvaluationRow ) -> dict [str , Any ]:
90- """Convert a single EvaluationRow to a dictionary for Lilac .
74+ def _evaluation_row_to_dict (row : EvaluationRow ) -> dict [str , Any ]:
75+ """Convert a single EvaluationRow to a dictionary.
9176
9277 The output contains JSON-serialized fields that can be reconstructed back
9378 to EvaluationRow. Users can add their own text columns for clustering.
9479 """
95- result : dict [ str , Any ] = {
80+ return {
9681 # Identifiers
9782 "row_id" : row .input_metadata .row_id if row .input_metadata else None ,
9883 # Full data as JSON (for reconstruction)
@@ -108,11 +93,9 @@ def evaluation_row_to_dict(row: EvaluationRow) -> dict[str, Any]:
10893 "has_tools" : bool (row .tools ),
10994 }
11095
111- return result
11296
113-
114- def dict_to_evaluation_row (data : dict [str , Any ]) -> EvaluationRow :
115- """Convert a Lilac row dictionary back to an EvaluationRow."""
97+ def _dict_to_evaluation_row (data : dict [str , Any ]) -> EvaluationRow :
98+ """Convert a dictionary back to an EvaluationRow."""
11699 # Parse messages
117100 messages = _deserialize_messages (data .get ("messages_json" ))
118101
@@ -167,97 +150,38 @@ def dict_to_evaluation_row(data: dict[str, Any]) -> EvaluationRow:
167150
168151
169152# =============================================================================
170- # Main Conversion Functions
153+ # Public API
171154# =============================================================================
172155
173156
174- def evaluation_rows_to_lilac_dataset (
175- rows : list [EvaluationRow ],
176- namespace : str = "local" ,
177- name : str = "eval-data" ,
178- project_dir : str | None = None ,
179- ) -> Any :
180- """Convert EvaluationRows to a Lilac dataset.
157+ def evaluation_rows_to_dataframe (rows : list [EvaluationRow ]) -> pd .DataFrame :
158+ """Convert EvaluationRows to a pandas DataFrame.
159+
160+ The DataFrame can be used directly with Lilac for clustering and curation.
181161
182162 Args:
183163 rows: List of EvaluationRow objects
184- namespace: Lilac namespace (default: 'local')
185- name: Dataset name
186- project_dir: Lilac project directory (uses default if None)
187164
188165 Returns:
189- Lilac Dataset object ready for clustering, filtering, etc.
190-
191- Example:
192- >>> dataset = evaluation_rows_to_lilac_dataset(rows, name='my-traces')
193- >>>
194- >>> # Add your own text column for clustering
195- >>> df = dataset.to_pandas()
196- >>> df['user_query'] = df['messages_json'].apply(extract_user_query)
197- >>> # Re-create dataset with new column, then cluster
166+ DataFrame with JSON-serialized fields for reconstruction
198167 """
199- _ensure_lilac_available ()
200- import lilac as ll_module # Re-import after ensuring available
201-
202- if project_dir :
203- ll_module .set_project_dir (project_dir )
204-
205- # Convert to DataFrame
206- records = [evaluation_row_to_dict (row ) for row in rows ]
207- df = pd .DataFrame (records )
208-
209- config = ll_module .DatasetConfig (
210- namespace = namespace ,
211- name = name ,
212- source = ll_module .PandasSource (df ),
213- )
214-
215- return ll_module .create_dataset (config )
168+ records = [_evaluation_row_to_dict (row ) for row in rows ]
169+ return pd .DataFrame (records )
216170
217171
218- def lilac_dataset_to_evaluation_rows (
219- dataset : Any ,
220- filters : list [tuple [str , str , Any ]] | None = None ,
221- limit : int | None = None ,
222- ) -> list [EvaluationRow ]:
223- """Convert a Lilac dataset back to EvaluationRows.
172+ def dataframe_to_evaluation_rows (df : pd .DataFrame ) -> list [EvaluationRow ]:
173+ """Convert a pandas DataFrame back to EvaluationRows.
224174
225175 Args:
226- dataset: Lilac Dataset object
227- filters: Optional Lilac filter tuples, e.g. [('score', 'greater', 0.5)]
228- limit: Maximum number of rows to return
176+ df: DataFrame with messages_json and other serialized fields
229177
230178 Returns:
231179 List of EvaluationRow objects
232180 """
233- _ensure_lilac_available ()
234-
235- # Build query
236- kwargs : dict [str , Any ] = {}
237- if filters :
238- kwargs ["filters" ] = filters
239- if limit :
240- kwargs ["limit" ] = limit
241-
242- df = dataset .select_rows (** kwargs ).df ()
243- return dataframe_to_evaluation_rows (df )
244-
245-
246- def evaluation_rows_to_dataframe (rows : list [EvaluationRow ]) -> pd .DataFrame :
247- """Convert EvaluationRows to a pandas DataFrame.
248-
249- Useful if you want to work with the DataFrame directly.
250- """
251- records = [evaluation_row_to_dict (row ) for row in rows ]
252- return pd .DataFrame (records )
253-
254-
255- def dataframe_to_evaluation_rows (df : pd .DataFrame ) -> list [EvaluationRow ]:
256- """Convert a pandas DataFrame back to EvaluationRows."""
257181 rows = []
258182 for _ , row_data in df .iterrows ():
259183 try :
260- row = dict_to_evaluation_row (row_data .to_dict ())
184+ row = _dict_to_evaluation_row (row_data .to_dict ())
261185 rows .append (row )
262186 except Exception as e :
263187 logger .warning (f"Failed to convert row: { e } " )
0 commit comments