|
| 1 | +# MANUAL SETUP REQUIRED: |
| 2 | +# |
| 3 | +# For GitHub Actions testing, you need: |
| 4 | +# 1. GitHub repository with rollout.yml workflow (see .github/workflows/rollout.yml) |
| 5 | +# 2. Repository secrets configured: FIREWORKS_API_KEY |
| 6 | +# 3. Environment variables: GITHUB_TOKEN (with repo and workflow permissions) |
| 7 | +# |
| 8 | +# The GitHub Actions workflow should accept model, metadata, and model_base_url inputs |
| 9 | +# and include: run-name: rollout:${{ fromJSON(inputs.metadata).rollout_id }} |
| 10 | + |
| 11 | +import os |
| 12 | +from typing import List |
| 13 | + |
| 14 | +import pytest |
| 15 | + |
| 16 | +from eval_protocol.data_loader.dynamic_data_loader import DynamicDataLoader |
| 17 | +from eval_protocol.models import EvaluationRow, Message |
| 18 | +from eval_protocol.pytest import evaluation_test |
| 19 | +from eval_protocol.pytest.github_action_rollout_processor import GithubActionRolloutProcessor |
| 20 | + |
| 21 | + |
| 22 | +def rows() -> List[EvaluationRow]: |
| 23 | + row = EvaluationRow(messages=[Message(role="user", content="What is the capital of France?")]) |
| 24 | + return [row, row, row] |
| 25 | + |
| 26 | + |
| 27 | +@pytest.mark.skipif(os.environ.get("CI") == "true", reason="Only run this test locally (skipped in CI)") |
| 28 | +@pytest.mark.parametrize("completion_params", [{"model": "fireworks_ai/accounts/fireworks/models/gpt-oss-120b"}]) |
| 29 | +@evaluation_test( |
| 30 | + data_loaders=DynamicDataLoader( |
| 31 | + generators=[rows], |
| 32 | + ), |
| 33 | + rollout_processor=GithubActionRolloutProcessor( |
| 34 | + owner="eval-protocol", |
| 35 | + repo="python-sdk", |
| 36 | + workflow_id="rollout.yml", # or you can use numeric ID like "12345678" |
| 37 | + ref=os.getenv("GITHUB_REF", "main"), |
| 38 | + poll_interval=3.0, # For multi-turn, you'll likely want higher poll interval |
| 39 | + timeout_seconds=300, |
| 40 | + ), |
| 41 | +) |
| 42 | +async def test_github_actions_quickstart(row: EvaluationRow) -> EvaluationRow: |
| 43 | + """ |
| 44 | + End-to-end test: |
| 45 | + - REQUIRES MANUAL SETUP: GitHub Actions workflow with secrets configured |
| 46 | + - trigger GitHub Actions rollout via GithubActionRolloutProcessor |
| 47 | + - fetch traces from Fireworks tracing proxy (uses default FireworksTracingAdapter) |
| 48 | + - FAIL if no traces found or rollout_id missing |
| 49 | + """ |
| 50 | + assert row.messages[0].content == "What is the capital of France?", "Row should have correct message content" |
| 51 | + assert len(row.messages) > 1, "Row should have a response. If this fails, we fell back to the original row." |
| 52 | + assert row.execution_metadata.rollout_id, "Row should have a rollout_id from the GitHub Actions rollout" |
| 53 | + |
| 54 | + return row |
0 commit comments