|
6 | 6 | parameters through ``completion_params``. Logs are written to |
7 | 7 | ``~/.eval_protocol/datasets/<YYYY-MM-DD>.jsonl`` via the local filesystem |
8 | 8 | logger so you can inspect the captured logprobs directly. |
| 9 | +
|
| 10 | +Environment variables: |
| 11 | + FIREWORKS_API_KEY - Required for Fireworks models |
9 | 12 | """ |
10 | 13 |
|
| 14 | +import os |
11 | 15 | from typing import List |
12 | 16 |
|
13 | 17 | from eval_protocol.dataset_logger.local_fs_dataset_logger_adapter import LocalFSDatasetLoggerAdapter |
|
17 | 21 |
|
18 | 22 | try: # pragma: no cover - optional dependency for the example |
19 | 23 | from deepeval.metrics import GEval |
| 24 | + from deepeval.models import LiteLLMModel |
20 | 25 | from deepeval.test_case import LLMTestCaseParams |
21 | 26 | except ImportError as exc: # pragma: no cover - optional dependency for the example |
22 | 27 | raise ImportError("Install deepeval to run this example: pip install deepeval") from exc |
23 | 28 |
|
| 29 | +# Use DeepSeek via Fireworks for the GEval judge model |
| 30 | +# Note: We need allowed_openai_params to enable top_logprobs for GEval's score normalization |
| 31 | +judge_model = LiteLLMModel( |
| 32 | + model="fireworks_ai/accounts/fireworks/models/deepseek-v3p2", |
| 33 | + api_key=os.environ.get("FIREWORKS_API_KEY"), |
| 34 | + allowed_openai_params=["top_logprobs"], # Enable logprobs for GEval normalization |
| 35 | +) |
| 36 | + |
24 | 37 | # Configure GEval to judge the assistant response with the full chat context. |
25 | 38 | wrapped_metric = adapt_metric( |
26 | 39 | GEval( |
27 | 40 | name="Helpful & Relevant", |
28 | 41 | criteria="Evaluate the helpfulness and relevance of the model output.", |
29 | 42 | evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT], |
| 43 | + model=judge_model, |
| 44 | + top_logprobs=5, # Fireworks max is 5 (default is 20) |
30 | 45 | ) |
31 | 46 | ) |
32 | 47 |
|
33 | 48 |
|
34 | 49 | @evaluation_test( |
35 | 50 | input_rows=[[EvaluationRow(messages=[{"role": "user", "content": "Say hello politely."}])]], |
36 | 51 | completion_params=[ |
37 | | - {"model": "gpt-3.5-turbo", "logprobs": True, "top_logprobs": 3}, |
38 | 52 | { |
39 | | - "model": "accounts/fireworks/models/qwen3-8b", |
40 | | - "logprobs": True, |
41 | | - "api_base": "https://api.fireworks.ai/inference/v1", |
42 | | - "custom_llm_provider": "fireworks_ai", |
| 53 | + "model": "fireworks_ai/accounts/fireworks/models/deepseek-v3p2", |
43 | 54 | }, |
44 | 55 | ], |
45 | 56 | logger=LocalFSDatasetLoggerAdapter(), |
|
0 commit comments