Skip to content

Commit 5998d83

Browse files
committed
updated with fireworks
1 parent ff210d4 commit 5998d83

1 file changed

Lines changed: 16 additions & 5 deletions

File tree

examples/deepeval/test_geval_with_logprobs.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66
parameters through ``completion_params``. Logs are written to
77
``~/.eval_protocol/datasets/<YYYY-MM-DD>.jsonl`` via the local filesystem
88
logger so you can inspect the captured logprobs directly.
9+
10+
Environment variables:
11+
FIREWORKS_API_KEY - Required for Fireworks models
912
"""
1013

14+
import os
1115
from typing import List
1216

1317
from eval_protocol.dataset_logger.local_fs_dataset_logger_adapter import LocalFSDatasetLoggerAdapter
@@ -17,29 +21,36 @@
1721

1822
try: # pragma: no cover - optional dependency for the example
1923
from deepeval.metrics import GEval
24+
from deepeval.models import LiteLLMModel
2025
from deepeval.test_case import LLMTestCaseParams
2126
except ImportError as exc: # pragma: no cover - optional dependency for the example
2227
raise ImportError("Install deepeval to run this example: pip install deepeval") from exc
2328

29+
# Use DeepSeek via Fireworks for the GEval judge model
30+
# Note: We need allowed_openai_params to enable top_logprobs for GEval's score normalization
31+
judge_model = LiteLLMModel(
32+
model="fireworks_ai/accounts/fireworks/models/deepseek-v3p2",
33+
api_key=os.environ.get("FIREWORKS_API_KEY"),
34+
allowed_openai_params=["top_logprobs"], # Enable logprobs for GEval normalization
35+
)
36+
2437
# Configure GEval to judge the assistant response with the full chat context.
2538
wrapped_metric = adapt_metric(
2639
GEval(
2740
name="Helpful & Relevant",
2841
criteria="Evaluate the helpfulness and relevance of the model output.",
2942
evaluation_params=[LLMTestCaseParams.INPUT, LLMTestCaseParams.ACTUAL_OUTPUT],
43+
model=judge_model,
44+
top_logprobs=5, # Fireworks max is 5 (default is 20)
3045
)
3146
)
3247

3348

3449
@evaluation_test(
3550
input_rows=[[EvaluationRow(messages=[{"role": "user", "content": "Say hello politely."}])]],
3651
completion_params=[
37-
{"model": "gpt-3.5-turbo", "logprobs": True, "top_logprobs": 3},
3852
{
39-
"model": "accounts/fireworks/models/qwen3-8b",
40-
"logprobs": True,
41-
"api_base": "https://api.fireworks.ai/inference/v1",
42-
"custom_llm_provider": "fireworks_ai",
53+
"model": "fireworks_ai/accounts/fireworks/models/deepseek-v3p2",
4354
},
4455
],
4556
logger=LocalFSDatasetLoggerAdapter(),

0 commit comments

Comments
 (0)