Skip to content

chore: remove private KIE provider config #25

chore: remove private KIE provider config

chore: remove private KIE provider config #25

Workflow file for this run

name: Merge Queue Checks
run-name: Merge Queue Checks for ${{ github.ref }}
# This workflow is called from 'general.yml' (so that it can depend on artifacts from other jobs)
# It is *not* invoked directly via a 'merge_group' event, so checking for 'github.event_name == 'merge_group' will not work
on:
workflow_dispatch:
workflow_call:
schedule:
- cron: "0 0 * * *" # Runs at 00:00 UTC every day
# When triggered from the merge queue, cancel any existing workflow runs for the same PR branch
# Otherwise, use the unique run id for the concurrency group, to prevent anything from getting cancelled
# Note that the event will be 'merge_group' when general.yml calls this workflow via a 'workflow_call' event
concurrency:
group: ${{ github.event_name == 'merge_group' && format('merge-queue-{0}-{1}', github.workflow, github.ref) || github.run_id }}
cancel-in-progress: true
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }}
AWS_REGION: "us-east-1"
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_AI_FOUNDRY_API_KEY: ${{ secrets.AZURE_AI_FOUNDRY_API_KEY }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_OPENAI_EASTUS2_API_KEY: ${{ secrets.AZURE_OPENAI_EASTUS2_API_KEY }}
AZURE_OPENAI_DEPLOYMENT_ID: ${{secrets.AZURE_OPENAI_DEPLOYMENT_ID }}
DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }}
FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }}
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
FORCE_COLOR: 1
GCP_STORAGE_ACCESS_KEY_ID: ${{ secrets.GCP_STORAGE_ACCESS_KEY_ID }}
GCP_STORAGE_SECRET_ACCESS_KEY: ${{ secrets.GCP_STORAGE_SECRET_ACCESS_KEY }}
GCP_VERTEX_CREDENTIALS_PATH: ${{ github.workspace }}/gcp_jwt_key.json
GOOGLE_AI_STUDIO_API_KEY: ${{ secrets.GOOGLE_AI_STUDIO_API_KEY }}
GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/gcp_jwt_key.json
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HYPERBOLIC_API_KEY: ${{secrets.HYPERBOLIC_API_KEY}}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
MODAL_KEY: ${{ secrets.MODAL_KEY }}
MODAL_SECRET: ${{ secrets.MODAL_SECRET }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
SGLANG_API_KEY: ${{ secrets.SGLANG_API_KEY }}
TGI_API_KEY: ${{ secrets.TGI_API_KEY }}
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
VLLM_API_BASE: ${{ secrets.VLLM_API_BASE }}
VLLM_API_KEY: ${{ secrets.VLLM_API_KEY }}
VLLM_MODEL_NAME: "microsoft/Phi-3.5-mini-instruct"
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
XAI_API_KEY: ${{ secrets.XAI_API_KEY }}
OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: https://localhost:4316
SQLX_OFFLINE: 1
TENSORZERO_E2E_PROXY: http://localhost:3003
TENSORZERO_COMMIT_TAG: sha-${{ github.sha }}
TENSORZERO_GATEWAY_TAG: sha-${{ github.sha }}
TENSORZERO_MOCK_PROVIDER_API_TAG: sha-${{ github.sha }}
TENSORZERO_CI: 1
# Nextest filter expression for flaky tests to skip (e.g. "test(hyperbolic)|test(tgi)")
CARGO_NEXTEST_FLAKY_TESTS: ${{ vars.CARGO_NEXTEST_FLAKY_TESTS }}
jobs:
# Build containers when triggered directly (not from general.yml which already builds them)
# When called from general.yml, github.event_name is 'merge_group' so this is skipped
build-gateway-e2e-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-gateway-e2e-container.yml
permissions:
contents: read
id-token: write
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }}
build-gateway-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-gateway-container.yml
permissions:
contents: read
id-token: write
build-mock-provider-api-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-mock-provider-api-container.yml
permissions:
contents: read
id-token: write
build-fixtures-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-fixtures-container.yml
permissions:
contents: read
id-token: write
secrets:
DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }}
build-provider-proxy-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-provider-proxy-container.yml
permissions:
contents: read
id-token: write
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }}
build-live-tests-container:
if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule')
uses: ./.github/workflows/build-live-tests-container.yml
permissions:
contents: read
id-token: write
secrets:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }}
live-tests:
needs:
[
build-gateway-e2e-container,
build-gateway-container,
build-fixtures-container,
build-provider-proxy-container,
build-live-tests-container,
]
# Run even when build-gateway-e2e-container is skipped (merge_group case)
if: always() && !failure() && !cancelled() && github.repository == 'tensorzero/tensorzero'
name: "live-tests (batch_writes: ${{ matrix.batch_writes }})"
runs-on: ubuntu-latest
<<<<<<< HEAD

Check failure on line 134 in .github/workflows/merge-queue.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/merge-queue.yml

Invalid workflow file

You have an error in your yaml syntax on line 134
=======
if: github.repository == 'RouterBase/tensorzero'
>>>>>>> f413826b9 (chore: build docker images)
permissions:
# Permission to checkout the repository
contents: read
# Permission to download artifacts
actions: read
timeout-minutes: 45
strategy:
matrix:
batch_writes: [true, false]
# Don't fail-fast for manual/cron runs, so that we get the full picture of what broke
fail-fast: ${{ github.event_name == 'merge_group' }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Cleanup disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be
- name: Download gateway container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-gateway-container
- name: Download provider-proxy container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-provider-proxy-container
- name: Download gateway-e2e container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-gateway-e2e-container
- name: Download live-tests container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-live-tests-container
- name: Download fixtures container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-fixtures-container
- name: Load container images
run: |
docker load < gateway-container.tar
docker load < provider-proxy-container.tar
docker load < gateway-e2e-container.tar
docker load < live-tests-container.tar
docker load < fixtures-container.tar
- name: Restore provider-proxy cache
if: github.event_name != 'schedule'
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684
with:
path: ./ci/provider-proxy-cache/
key: provider-proxy-cache-${{ github.run_id }}
restore-keys: provider-proxy-cache-
- name: Download provider-proxy cache
# When running as a cron job, don't use the provider-proxy cache.
# The cron job is used to gather information about provider flakiness.
if: github.event_name != 'schedule'
run: |
AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache ./ci/download-provider-proxy-cache.sh
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Write GCP JWT key to file
env:
GCP_JWT_KEY: ${{ secrets.GCP_JWT_KEY }}
run: echo "$GCP_JWT_KEY" > $GITHUB_WORKSPACE/gcp_jwt_key.json
- name: Pull images referenced by the compose file
run: docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy pull --ignore-pull-failures
- name: Run live tests container via Docker Compose
run: |
DOCKER_UID=$(id -u) DOCKER_GID=$(id -g) docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy run --rm -e TENSORZERO_CI=1 -e TENSORZERO_FF_WRITE_CONFIG_SNAPSHOT=1 live-tests
- name: Print live tests logs
if: always()
run: docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy logs -t
- name: Check e2e logs for impossible error messages
run: |
LOGS=$(docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy logs gateway)
if [ -z "$LOGS" ]; then
echo "ERROR: Gateway logs are empty"
exit 1
fi
grep --invert-match -i "please file a bug report" <<< "$LOGS"
# # TODO(https://github.com/tensorzero/tensorzero/issues/3989) - move this back to the end of the job
# # For now, we only check for deprecation warnings after running the Rust e2e tests
# TODO - re-enable this: https://github.com/tensorzero/tensorzero/issues/3989
# - name: Check e2e logs for deprecation warnings (gateway e2e tests only)
# run: |
# LOGS=$(docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml logs gateway)
# if [ -z "$LOGS" ]; then
# echo "ERROR: Gateway logs are empty"
# exit 1
# fi
# ! grep -i "Deprecation Warning" "$LOGS"
- name: Upload provider-proxy cache
# Only upload the cache when we're running from a 'good' run
# (running from the merge queue via `general.yml` or a cron job)
# This prevents manual workflow runs from modifying the cache
if: github.event_name == 'merge_group' || github.event_name == 'schedule'
run: |
AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache ./ci/upload-provider-proxy-cache.sh
client-tests:
needs:
[
build-gateway-e2e-container,
build-gateway-container,
build-mock-provider-api-container,
build-fixtures-container,
build-provider-proxy-container,
]
# Run even when build jobs are skipped (merge_group case where general.yml already built them)
if: always() && !failure() && !cancelled() && github.repository == 'tensorzero/tensorzero'
name: "client-tests (batch_writes: ${{ matrix.batch_writes }})"
runs-on: ubuntu-latest
<<<<<<< HEAD
=======
if: github.repository == 'RouterBase/tensorzero'
>>>>>>> f413826b9 (chore: build docker images)
permissions:
# Permission to checkout the repository
contents: read
# Permission to download artifacts
actions: read
timeout-minutes: 45
strategy:
matrix:
batch_writes: [true, false]
# Don't fail-fast for manual/cron runs, so that we get the full picture of what broke
fail-fast: ${{ github.event_name == 'merge_group' }}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Install gdb
run: sudo apt-get update && sudo apt-get install -y gdb
- name: Warm up Modal instances
run: |
curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-inference-vllm-inference.modal.run/docs > vllm_modal_logs.txt &
curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--sglang-0-4-10-inference-sglang-inference.modal.run/ > sglang_modal_logs.txt &
# TODO: Re-enable once we can switch to a T4 GPU
# curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-gpt-oss-20b-serve.modal.run/ > vllm_gpt_oss_modal_logs.txt &
- name: Cleanup disk space
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be
- name: Restore client-tests provider-proxy cache
if: github.event_name != 'schedule'
uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684
with:
path: ./ci/provider-proxy-cache/
key: provider-proxy-cache-client-tests-${{ github.run_id }}
restore-keys: provider-proxy-cache-client-tests-
- name: Download client-tests provider-proxy cache
# When running as a cron job, don't use the provider-proxy cache.
# The cron job is used to gather information about provider flakiness.
if: github.event_name != 'schedule'
run: |
AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache-client-tests ./ci/download-provider-proxy-cache.sh
- name: Update Rust
run: |
for attempt in 1 2 3; do
if rustup update stable && rustup default stable; then
break
fi
if [ $attempt -eq 3 ]; then
echo "Failed to update Rust after 3 attempts"
exit 1
fi
sleep $((10 * attempt))
done
shell: bash
- name: Download gateway container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-gateway-container
- name: Download mock-provider-api container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-mock-provider-api-container
- name: Download fixtures container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-fixtures-container
- name: Download gateway-e2e container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-gateway-e2e-container
- name: Download provider-proxy container image
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53
with:
name: build-provider-proxy-container
- name: Load container images
run: |
docker load < gateway-container.tar
docker load < mock-provider-api-container.tar
docker load < fixtures-container.tar
docker load < gateway-e2e-container.tar
docker load < provider-proxy-container.tar
- name: Install Rust toolchain
run: |
for attempt in 1 2 3; do
if rustup toolchain install stable && rustup default stable; then
break
fi
if [ $attempt -eq 3 ]; then
echo "Failed to install Rust toolchain after 3 attempts"
exit 1
fi
sleep $((10 * attempt))
done
shell: bash
# Start testing workload identity federation credentials once the SDK adds support: https://github.com/googleapis/google-cloud-rust/issues/1342
# - uses: 'google-github-actions/auth@v2'
# with:
# project_id: 'tensozero-public'
# workload_identity_provider: 'projects/454541351720/locations/global/workloadIdentityPools/github/providers/tensorzero'
- name: Print Rust version
run: rustc --version
- name: Install uv
uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b
with:
version: "0.9.27"
- name: Install pnpm
run: |
for attempt in 1 2 3; do
if npm install -g pnpm@latest; then
break
fi
if [ $attempt -eq 3 ]; then
echo "Failed to install pnpm after 3 attempts"
exit 1
fi
sleep $((10 * attempt))
done
shell: bash
- name: Install JS dependencies
run: pnpm install --frozen-lockfile
- name: Login to DockerHub
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Install cargo-nextest
uses: taiki-e/install-action@60581cd7025e0e855cebd745379013e286d9c787
with:
tool: cargo-nextest
- name: Write GCP JWT key to file
env:
GCP_JWT_KEY: ${{ secrets.GCP_JWT_KEY }}
run: echo "$GCP_JWT_KEY" > $GITHUB_WORKSPACE/gcp_jwt_key.json
- name: Set up database URLs for E2E tests
run: |
echo "TENSORZERO_CLICKHOUSE_URL=http://chuser:chpassword@localhost:8123/tensorzero_e2e_tests" >> $GITHUB_ENV
echo "TENSORZERO_CLICKHOUSE_BATCH_WRITES=${{ matrix.batch_writes }}" >> $GITHUB_ENV
echo "DATABASE_URL=postgresql://postgres:postgres@localhost:5432/tensorzero-e2e-tests" >> $GITHUB_ENV
echo "TENSORZERO_POSTGRES_URL=postgresql://postgres:postgres@localhost:5432/tensorzero-e2e-tests" >> $GITHUB_ENV
echo "TENSORZERO_VALKEY_URL=redis://localhost:6379" >> $GITHUB_ENV
echo "TENSORZERO_SKIP_LARGE_FIXTURES=1" >> $GITHUB_ENV
- name: Configure batch writes in tensorzero.toml
if: matrix.batch_writes == true
run: |
echo "[gateway.observability.batch_writes]" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml
echo "enabled = true" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml
echo "flush_interval_ms = 80" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml
echo "__force_allow_embedded_batch_writes = true" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml
- name: Launch dependency services for E2E tests
run: |
DOCKER_UID=$(id -u) DOCKER_GID=$(id -g) docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy up --no-build -d --wait
- name: Print ClickHouse container logs
if: always()
run: |
docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy logs -t
- name: Set up gateway environment file
run: |
# TensorZero config
echo "TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL}" >> gateway.env
echo "TENSORZERO_CLICKHOUSE_BATCH_WRITES=${TENSORZERO_CLICKHOUSE_BATCH_WRITES}" >> gateway.env
echo "TENSORZERO_POSTGRES_URL=${TENSORZERO_POSTGRES_URL}" >> gateway.env
echo "TENSORZERO_VALKEY_URL=${TENSORZERO_VALKEY_URL}" >> gateway.env
echo "TENSORZERO_E2E_PROXY=${TENSORZERO_E2E_PROXY}" >> gateway.env
echo "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" >> gateway.env
echo "GCP_VERTEX_CREDENTIALS_PATH=/app/gcp_jwt_key.json" >> gateway.env
echo "GOOGLE_APPLICATION_CREDENTIALS=/app/gcp_jwt_key.json" >> gateway.env
# Provider API keys
echo "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}" >> gateway.env
echo "AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}" >> gateway.env
echo "AWS_BEARER_TOKEN_BEDROCK=${AWS_BEARER_TOKEN_BEDROCK}" >> gateway.env
echo "AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}" >> gateway.env
echo "AWS_REGION=${AWS_REGION}" >> gateway.env
echo "AZURE_API_KEY=${AZURE_API_KEY}" >> gateway.env
echo "AZURE_OPENAI_EASTUS2_API_KEY=${AZURE_OPENAI_EASTUS2_API_KEY}" >> gateway.env
echo "AZURE_OPENAI_DEPLOYMENT_ID=${AZURE_OPENAI_DEPLOYMENT_ID}" >> gateway.env
echo "AZURE_AI_FOUNDRY_API_KEY=${AZURE_AI_FOUNDRY_API_KEY}" >> gateway.env
echo "DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}" >> gateway.env
echo "FIREWORKS_API_KEY=${FIREWORKS_API_KEY}" >> gateway.env
echo "FIREWORKS_ACCOUNT_ID=${FIREWORKS_ACCOUNT_ID}" >> gateway.env
echo "GCP_STORAGE_ACCESS_KEY_ID=${GCP_STORAGE_ACCESS_KEY_ID}" >> gateway.env
echo "GCP_STORAGE_SECRET_ACCESS_KEY=${GCP_STORAGE_SECRET_ACCESS_KEY}" >> gateway.env
echo "GOOGLE_AI_STUDIO_API_KEY=${GOOGLE_AI_STUDIO_API_KEY}" >> gateway.env
echo "GROQ_API_KEY=${GROQ_API_KEY}" >> gateway.env
echo "HYPERBOLIC_API_KEY=${HYPERBOLIC_API_KEY}" >> gateway.env
echo "MISTRAL_API_KEY=${MISTRAL_API_KEY}" >> gateway.env
echo "MODAL_KEY=${MODAL_KEY}" >> gateway.env
echo "MODAL_SECRET=${MODAL_SECRET}" >> gateway.env
echo "OPENAI_API_KEY=${OPENAI_API_KEY}" >> gateway.env
echo "OPENROUTER_API_KEY=${OPENROUTER_API_KEY}" >> gateway.env
echo "SGLANG_API_KEY=${SGLANG_API_KEY}" >> gateway.env
echo "TGI_API_KEY=${TGI_API_KEY}" >> gateway.env
echo "TOGETHER_API_KEY=${TOGETHER_API_KEY}" >> gateway.env
echo "VLLM_API_KEY=${VLLM_API_KEY}" >> gateway.env
echo "VOYAGE_API_KEY=${VOYAGE_API_KEY}" >> gateway.env
echo "XAI_API_KEY=${XAI_API_KEY}" >> gateway.env
- name: Run postgres migrations
run: |
docker run --rm --network host \
--env-file gateway.env \
tensorzero/gateway-e2e:sha-${{ github.sha }} \
--run-postgres-migrations
- name: Launch the gateway for E2E tests
timeout-minutes: 2
run: |
docker run -d --name gateway-e2e --network host \
--env-file gateway.env \
-v ${{ github.workspace }}/gcp_jwt_key.json:/app/gcp_jwt_key.json:ro \
-v ${{ github.workspace }}/tensorzero-core:/app/tensorzero-core:ro \
-v ${{ github.workspace }}/ui/fixtures:/app/ui/fixtures:ro \
tensorzero/gateway-e2e:sha-${{ github.sha }} \
--config-file '/app/tensorzero-core/tests/e2e/config/tensorzero.*.toml'
while ! curl -s -f http://localhost:3000/health >/dev/null 2>&1; do
echo "Waiting for gateway to be healthy..."
sleep 1
done
- name: Install Python for python async client tests
run: uv python install 3.9
- name: "Python: PyO3 Client: pytest (non-mock tests)"
working-directory: clients/python
run: |
# Start the test in background and capture its PID
bash ./test.sh --verbose -n 8 -m "not mock" &
TEST_PID=$!
echo "Started test.sh with PID: $TEST_PID"
# Wait for 10 minutes (600 seconds)
for i in {1..600}; do
if ! kill -0 $TEST_PID 2>/dev/null; then
echo "Test completed normally"
wait $TEST_PID
exit $?
fi
sleep 1
done
echo "Test has been running for 10 minutes, capturing backtraces..."
# Get all processes related to our test
echo "=== Process tree ==="
ps -ef | grep -E "(test\.sh|pytest|python)" | grep -v grep || true
echo "=== Capturing backtraces with gdb ==="
# Find all python processes that might be related to our test
PYTHON_PIDS=$(pgrep -f "tensorzero.*python" || true)
if [ -n "$PYTHON_PIDS" ]; then
for pid in $PYTHON_PIDS; do
echo "--- Backtrace for Python process $pid ---"
gdb -p $pid --batch \
-ex "set pagination off" \
-ex "thread apply all bt" \
-ex "info threads" \
-ex "detach" \
-ex "quit" 2>&1 || true
echo ""
done
else
echo "No Python processes found"
fi
exit 1
- name: "Python: PyO3 Client: pytest (mock tests)"
working-directory: clients/python
env:
TENSORZERO_INTERNAL_MOCK_PROVIDER_API: http://localhost:3030
run: |
bash ./test.sh --verbose -n 8 -m mock
- name: "Node.js: OpenAI Client: test"
working-directory: clients/openai-node
run: |
pnpm run test
- name: Install Go
uses: actions/setup-go@29694d72cd5e7ef3b09496b39f28a942af47737e
with:
go-version: "1.24"
- name: "Go: OpenAI Client: test"
working-directory: clients/openai-go/tests
run: go test -v
- name: "Python: Recipes: pytest"
working-directory: recipes
run: |
uv run pytest
- name: Print e2e logs
if: always()
run: docker logs gateway-e2e 2>&1 || echo "No gateway container logs available"
- name: Check e2e logs for impossible error messages
run: |
LOGS=$(docker logs gateway-e2e 2>&1)
if [ -z "$LOGS" ]; then
echo "ERROR: Gateway logs are empty"
exit 1
fi
grep --invert-match -i "please file a bug report" <<< "$LOGS"
- name: Terminate the gateway container
if: always()
run: |
docker stop gateway-e2e || true
docker rm gateway-e2e || true
- name: Print provider-proxy logs
if: always()
run: docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy logs provider-proxy 2>&1 || echo "No provider-proxy logs available"
- name: Print vLLM modal logs
if: always()
run: cat vllm_modal_logs.txt
- name: Print SGLang modal logs
if: always()
run: cat sglang_modal_logs.txt
- name: Print vLLM GPT-OSS modal logs
if: always()
continue-on-error: true
run: cat vllm_gpt_oss_modal_logs.txt
- name: Upload client-tests provider-proxy cache
# Only upload the cache when we're running from a 'good' run
# (running from the merge queue via 'workflow_call' from general.yml, or a cron job)
# This prevents manual workflow runs from modifying the cache
if: github.event_name == 'merge_group' || github.event_name == 'schedule'
run: |
AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache-client-tests ./ci/upload-provider-proxy-cache.sh
# See 'ci/README.md' at the repository root for more details.
check-all-tests-passed:
permissions: {}
if: always() && github.repository == 'RouterBase/tensorzero'
needs: [client-tests, live-tests]
runs-on: ubuntu-latest
steps:
# When running in the merge queue, jobs should never be skipped.
# In a scheduled run, some jobs may be intentionally skipped, as we only care about regenerating the model inference cache.
- if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || (github.event_name != 'pull_request' && contains(needs.*.result, 'skipped')) }}
run: exit 1
# Post to Slack on failure for scheduled (cron) runs only.
# Scheduled runs bypass the provider-proxy cache to detect provider flakiness.
# See: https://github.com/tensorzero/tensorzero/issues/5380
- name: Post to Slack on failure (scheduled runs only)
if: failure() && github.event_name == 'schedule'
uses: slackapi/slack-github-action@v2.1.1
with:
method: chat.postMessage
token: ${{ secrets.SLACK_BOT_TOKEN }}
payload: |
channel: C09DM0RGDFG
text: "Live provider tests failed (scheduled run, no cache): <https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>"