|
6 | 6 |
|
7 | 7 | from langfuse.api.resources.commons.types.observations_view import ObservationsView |
8 | 8 | import logging |
| 9 | +import random |
| 10 | +import time |
9 | 11 | from datetime import datetime, timedelta |
10 | 12 | from typing import Any, Dict, Iterator, List, Optional, cast |
11 | 13 |
|
@@ -59,54 +61,154 @@ def __init__(self): |
59 | 61 | def get_evaluation_rows( |
60 | 62 | self, |
61 | 63 | limit: int = 100, |
| 64 | + sample_size: int = 50, |
62 | 65 | tags: Optional[List[str]] = None, |
63 | 66 | user_id: Optional[str] = None, |
64 | 67 | session_id: Optional[str] = None, |
65 | 68 | hours_back: Optional[int] = None, |
| 69 | + from_timestamp: Optional[datetime] = None, |
| 70 | + to_timestamp: Optional[datetime] = None, |
66 | 71 | include_tool_calls: bool = True, |
| 72 | + sleep_between_gets: float = 2.5, |
| 73 | + max_retries: int = 3, |
67 | 74 | ) -> List[EvaluationRow]: |
68 | 75 | """Pull traces from Langfuse and convert to EvaluationRow format. |
69 | 76 |
|
70 | 77 | Args: |
71 | | - limit: Maximum number of rows to return |
| 78 | + limit: Max number of trace summaries to collect via pagination (pre-sampling) |
| 79 | + sample_size: Number of traces to fetch full details for (sampled from collected summaries) |
72 | 80 | tags: Filter by specific tags |
73 | 81 | user_id: Filter by user ID |
74 | 82 | session_id: Filter by session ID |
75 | 83 | hours_back: Filter traces from this many hours ago |
| 84 | + from_timestamp: Explicit start time (overrides hours_back) |
| 85 | + to_timestamp: Explicit end time (overrides hours_back) |
76 | 86 | include_tool_calls: Whether to include tool calling traces |
| 87 | + sleep_between_gets: Sleep time between individual trace.get() calls (2.5s for 30 req/min limit) |
| 88 | + max_retries: Maximum retries for rate limit errors |
77 | 89 |
|
78 | | - Yields: |
79 | | - EvaluationRow: Converted evaluation rows |
| 90 | + Returns: |
| 91 | + List[EvaluationRow]: Converted evaluation rows |
80 | 92 | """ |
81 | | - # Get traces from Langfuse using new API |
| 93 | + eval_rows = [] |
82 | 94 |
|
83 | | - if hours_back: |
| 95 | + # Determine time window: explicit from/to takes precedence over hours_back |
| 96 | + if from_timestamp is None and to_timestamp is None and hours_back: |
84 | 97 | to_timestamp = datetime.now() |
85 | 98 | from_timestamp = to_timestamp - timedelta(hours=hours_back) |
86 | | - else: |
87 | | - to_timestamp = None |
88 | | - from_timestamp = None |
89 | 99 |
|
90 | | - eval_rows = [] |
| 100 | + # Collect trace summaries via pagination (up to limit) |
| 101 | + all_traces = [] |
| 102 | + page = 1 |
| 103 | + collected = 0 |
91 | 104 |
|
92 | | - traces: Traces = self.client.api.trace.list( |
93 | | - limit=limit, |
94 | | - tags=tags, |
95 | | - user_id=user_id, |
96 | | - session_id=session_id, |
97 | | - from_timestamp=from_timestamp, |
98 | | - to_timestamp=to_timestamp, |
99 | | - ) |
| 105 | + while collected < limit: |
| 106 | + current_page_limit = min(100, limit - collected) # Langfuse API max is 100 |
100 | 107 |
|
101 | | - for trace in traces.data: |
102 | | - try: |
103 | | - trace: TraceWithFullDetails = self.client.api.trace.get(trace.id) |
104 | | - eval_row = self._convert_trace_to_evaluation_row(trace, include_tool_calls) |
105 | | - if eval_row: |
106 | | - eval_rows.append(eval_row) |
107 | | - except (AttributeError, ValueError, KeyError) as e: |
108 | | - logger.warning("Failed to convert trace %s: %s", trace.id, e) |
109 | | - continue |
| 108 | + logger.debug( |
| 109 | + "Fetching page %d with limit %d (collected: %d/%d)", page, current_page_limit, collected, limit |
| 110 | + ) |
| 111 | + |
| 112 | + # Fetch trace list with retry logic |
| 113 | + traces = None |
| 114 | + list_retries = 0 |
| 115 | + while list_retries < max_retries: |
| 116 | + try: |
| 117 | + traces = self.client.api.trace.list( |
| 118 | + page=page, |
| 119 | + limit=current_page_limit, |
| 120 | + tags=tags, |
| 121 | + user_id=user_id, |
| 122 | + session_id=session_id, |
| 123 | + from_timestamp=from_timestamp, |
| 124 | + to_timestamp=to_timestamp, |
| 125 | + order_by="timestamp.desc", |
| 126 | + ) |
| 127 | + break |
| 128 | + except Exception as e: |
| 129 | + list_retries += 1 |
| 130 | + if "429" in str(e) and list_retries < max_retries: |
| 131 | + sleep_time = 2**list_retries # Exponential backoff |
| 132 | + logger.warning( |
| 133 | + "Rate limit hit on trace.list(), retrying in %ds (attempt %d/%d)", |
| 134 | + sleep_time, |
| 135 | + list_retries, |
| 136 | + max_retries, |
| 137 | + ) |
| 138 | + time.sleep(sleep_time) |
| 139 | + else: |
| 140 | + logger.error("Failed to fetch trace list after %d retries: %s", max_retries, e) |
| 141 | + return eval_rows # Return what we have so far |
| 142 | + |
| 143 | + if not traces or not traces.data: |
| 144 | + logger.debug("No more traces found on page %d", page) |
| 145 | + break |
| 146 | + |
| 147 | + logger.debug("Collected %d traces from page %d", len(traces.data), page) |
| 148 | + |
| 149 | + all_traces.extend(traces.data) |
| 150 | + collected += len(traces.data) |
| 151 | + |
| 152 | + # Check if we have more pages |
| 153 | + if hasattr(traces.meta, "page") and hasattr(traces.meta, "total_pages"): |
| 154 | + if traces.meta.page >= traces.meta.total_pages: |
| 155 | + break |
| 156 | + elif len(traces.data) < current_page_limit: |
| 157 | + break |
| 158 | + |
| 159 | + page += 1 |
| 160 | + |
| 161 | + if not all_traces: |
| 162 | + logger.debug("No traces found") |
| 163 | + return eval_rows |
| 164 | + |
| 165 | + # Randomly sample traces to fetch full details (respect rate limits) |
| 166 | + actual_sample_size = min(sample_size, len(all_traces)) |
| 167 | + selected_traces = random.sample(all_traces, actual_sample_size) |
| 168 | + |
| 169 | + logger.debug("Randomly selected %d traces from %d collected", actual_sample_size, len(all_traces)) |
| 170 | + |
| 171 | + # Process each selected trace with sleep and retry logic |
| 172 | + for trace_info in selected_traces: |
| 173 | + # Sleep between gets to avoid rate limits |
| 174 | + if sleep_between_gets > 0: |
| 175 | + time.sleep(sleep_between_gets) |
| 176 | + |
| 177 | + # Fetch full trace details with retry logic |
| 178 | + trace_full = None |
| 179 | + detail_retries = 0 |
| 180 | + while detail_retries < max_retries: |
| 181 | + try: |
| 182 | + trace_full = self.client.api.trace.get(trace_info.id) |
| 183 | + break |
| 184 | + except Exception as e: |
| 185 | + detail_retries += 1 |
| 186 | + if "429" in str(e) and detail_retries < max_retries: |
| 187 | + sleep_time = 2**detail_retries # Exponential backoff |
| 188 | + logger.warning( |
| 189 | + "Rate limit hit on trace.get(%s), retrying in %ds (attempt %d/%d)", |
| 190 | + trace_info.id, |
| 191 | + sleep_time, |
| 192 | + detail_retries, |
| 193 | + max_retries, |
| 194 | + ) |
| 195 | + time.sleep(sleep_time) |
| 196 | + else: |
| 197 | + logger.warning("Failed to fetch trace %s after %d retries: %s", trace_info.id, max_retries, e) |
| 198 | + break # Skip this trace |
| 199 | + |
| 200 | + if trace_full: |
| 201 | + try: |
| 202 | + eval_row = self._convert_trace_to_evaluation_row(trace_full, include_tool_calls) |
| 203 | + if eval_row: |
| 204 | + eval_rows.append(eval_row) |
| 205 | + except (AttributeError, ValueError, KeyError) as e: |
| 206 | + logger.warning("Failed to convert trace %s: %s", trace_info.id, e) |
| 207 | + continue |
| 208 | + |
| 209 | + logger.info( |
| 210 | + "Successfully processed %d selected traces into %d evaluation rows", len(selected_traces), len(eval_rows) |
| 211 | + ) |
110 | 212 | return eval_rows |
111 | 213 |
|
112 | 214 | def get_evaluation_rows_by_ids( |
|
0 commit comments