From b86b2dadd538fbd191a3a1a90812dd5d3955d029 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Thu, 21 Aug 2025 10:02:46 -0700 Subject: [PATCH] test smoke test --- .github/workflows/e2e-smoke-test.yml | 1 + tests/test_tau_bench_airline_smoke.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-smoke-test.yml b/.github/workflows/e2e-smoke-test.yml index 2d160ac1..802562a2 100644 --- a/.github/workflows/e2e-smoke-test.yml +++ b/.github/workflows/e2e-smoke-test.yml @@ -11,6 +11,7 @@ on: required: false default: false type: boolean + push: jobs: e2e-smoke-test: diff --git a/tests/test_tau_bench_airline_smoke.py b/tests/test_tau_bench_airline_smoke.py index 044447b7..608d5c4b 100644 --- a/tests/test_tau_bench_airline_smoke.py +++ b/tests/test_tau_bench_airline_smoke.py @@ -74,7 +74,7 @@ def tau_bench_airline_smoke_to_evaluation_row(data: List[Dict[str, Any]]) -> Lis ], rollout_processor=MCPGymRolloutProcessor(), passed_threshold=0.36, - num_runs=1, # Smoke test: single run for quick feedback + num_runs=8, # Smoke test: single run for quick feedback mode="pointwise", max_concurrent_rollouts=50, # Standard concurrency server_script_path="examples/tau2_mcp/server.py",