sora2: translate image_urls → image (i2v) and resolution → size (pro … #30
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Merge Queue Checks | ||
| run-name: Merge Queue Checks for ${{ github.ref }} | ||
| # This workflow is called from 'general.yml' (so that it can depend on artifacts from other jobs) | ||
| # It is *not* invoked directly via a 'merge_group' event, so checking for 'github.event_name == 'merge_group' will not work | ||
| on: | ||
| workflow_dispatch: | ||
| workflow_call: | ||
| schedule: | ||
| - cron: "0 0 * * *" # Runs at 00:00 UTC every day | ||
| # When triggered from the merge queue, cancel any existing workflow runs for the same PR branch | ||
| # Otherwise, use the unique run id for the concurrency group, to prevent anything from getting cancelled | ||
| # Note that the event will be 'merge_group' when general.yml calls this workflow via a 'workflow_call' event | ||
| concurrency: | ||
| group: ${{ github.event_name == 'merge_group' && format('merge-queue-{0}-{1}', github.workflow, github.ref) || github.run_id }} | ||
| cancel-in-progress: true | ||
| env: | ||
| ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} | ||
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | ||
| AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }} | ||
| AWS_REGION: "us-east-1" | ||
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | ||
| AZURE_AI_FOUNDRY_API_KEY: ${{ secrets.AZURE_AI_FOUNDRY_API_KEY }} | ||
| AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} | ||
| AZURE_OPENAI_EASTUS2_API_KEY: ${{ secrets.AZURE_OPENAI_EASTUS2_API_KEY }} | ||
| AZURE_OPENAI_DEPLOYMENT_ID: ${{secrets.AZURE_OPENAI_DEPLOYMENT_ID }} | ||
| DEEPSEEK_API_KEY: ${{ secrets.DEEPSEEK_API_KEY }} | ||
| FIREWORKS_ACCOUNT_ID: ${{ secrets.FIREWORKS_ACCOUNT_ID }} | ||
| FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} | ||
| FORCE_COLOR: 1 | ||
| GCP_STORAGE_ACCESS_KEY_ID: ${{ secrets.GCP_STORAGE_ACCESS_KEY_ID }} | ||
| GCP_STORAGE_SECRET_ACCESS_KEY: ${{ secrets.GCP_STORAGE_SECRET_ACCESS_KEY }} | ||
| GCP_VERTEX_CREDENTIALS_PATH: ${{ github.workspace }}/gcp_jwt_key.json | ||
| GOOGLE_AI_STUDIO_API_KEY: ${{ secrets.GOOGLE_AI_STUDIO_API_KEY }} | ||
| GOOGLE_APPLICATION_CREDENTIALS: ${{ github.workspace }}/gcp_jwt_key.json | ||
| GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} | ||
| HYPERBOLIC_API_KEY: ${{secrets.HYPERBOLIC_API_KEY}} | ||
| MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} | ||
| MODAL_KEY: ${{ secrets.MODAL_KEY }} | ||
| MODAL_SECRET: ${{ secrets.MODAL_SECRET }} | ||
| OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | ||
| OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} | ||
| R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }} | ||
| R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }} | ||
| SGLANG_API_KEY: ${{ secrets.SGLANG_API_KEY }} | ||
| TGI_API_KEY: ${{ secrets.TGI_API_KEY }} | ||
| TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }} | ||
| VLLM_API_BASE: ${{ secrets.VLLM_API_BASE }} | ||
| VLLM_API_KEY: ${{ secrets.VLLM_API_KEY }} | ||
| VLLM_MODEL_NAME: "microsoft/Phi-3.5-mini-instruct" | ||
| VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }} | ||
| XAI_API_KEY: ${{ secrets.XAI_API_KEY }} | ||
| OTEL_EXPORTER_OTLP_TRACES_ENDPOINT: https://localhost:4316 | ||
| SQLX_OFFLINE: 1 | ||
| TENSORZERO_E2E_PROXY: http://localhost:3003 | ||
| TENSORZERO_COMMIT_TAG: sha-${{ github.sha }} | ||
| TENSORZERO_GATEWAY_TAG: sha-${{ github.sha }} | ||
| TENSORZERO_MOCK_PROVIDER_API_TAG: sha-${{ github.sha }} | ||
| TENSORZERO_CI: 1 | ||
| # Nextest filter expression for flaky tests to skip (e.g. "test(hyperbolic)|test(tgi)") | ||
| CARGO_NEXTEST_FLAKY_TESTS: ${{ vars.CARGO_NEXTEST_FLAKY_TESTS }} | ||
| jobs: | ||
| # Build containers when triggered directly (not from general.yml which already builds them) | ||
| # When called from general.yml, github.event_name is 'merge_group' so this is skipped | ||
| build-gateway-e2e-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-gateway-e2e-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| secrets: | ||
| DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} | ||
| DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }} | ||
| build-gateway-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-gateway-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| build-mock-provider-api-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-mock-provider-api-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| build-fixtures-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-fixtures-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| secrets: | ||
| DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }} | ||
| build-provider-proxy-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-provider-proxy-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| secrets: | ||
| DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} | ||
| DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }} | ||
| build-live-tests-container: | ||
| if: github.repository == 'tensorzero/tensorzero' && (github.event_name == 'workflow_dispatch' || github.event_name == 'schedule') | ||
| uses: ./.github/workflows/build-live-tests-container.yml | ||
| permissions: | ||
| contents: read | ||
| id-token: write | ||
| secrets: | ||
| DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} | ||
| DOCKERHUB_LIMITED_TOKEN: ${{ secrets.DOCKERHUB_LIMITED_TOKEN }} | ||
| live-tests: | ||
| needs: | ||
| [ | ||
| build-gateway-e2e-container, | ||
| build-gateway-container, | ||
| build-fixtures-container, | ||
| build-provider-proxy-container, | ||
| build-live-tests-container, | ||
| ] | ||
| # Run even when build-gateway-e2e-container is skipped (merge_group case) | ||
| if: always() && !failure() && !cancelled() && github.repository == 'tensorzero/tensorzero' | ||
| name: "live-tests (batch_writes: ${{ matrix.batch_writes }})" | ||
| runs-on: ubuntu-latest | ||
| <<<<<<< HEAD | ||
| ======= | ||
| if: github.repository == 'RouterBase/tensorzero' | ||
| >>>>>>> f413826b9 (chore: build docker images) | ||
| permissions: | ||
| # Permission to checkout the repository | ||
| contents: read | ||
| # Permission to download artifacts | ||
| actions: read | ||
| timeout-minutes: 45 | ||
| strategy: | ||
| matrix: | ||
| batch_writes: [true, false] | ||
| # Don't fail-fast for manual/cron runs, so that we get the full picture of what broke | ||
| fail-fast: ${{ github.event_name == 'merge_group' }} | ||
| steps: | ||
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 | ||
| - name: Cleanup disk space | ||
| uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be | ||
| - name: Download gateway container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-gateway-container | ||
| - name: Download provider-proxy container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-provider-proxy-container | ||
| - name: Download gateway-e2e container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-gateway-e2e-container | ||
| - name: Download live-tests container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-live-tests-container | ||
| - name: Download fixtures container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-fixtures-container | ||
| - name: Load container images | ||
| run: | | ||
| docker load < gateway-container.tar | ||
| docker load < provider-proxy-container.tar | ||
| docker load < gateway-e2e-container.tar | ||
| docker load < live-tests-container.tar | ||
| docker load < fixtures-container.tar | ||
| - name: Restore provider-proxy cache | ||
| if: github.event_name != 'schedule' | ||
| uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 | ||
| with: | ||
| path: ./ci/provider-proxy-cache/ | ||
| key: provider-proxy-cache-${{ github.run_id }} | ||
| restore-keys: provider-proxy-cache- | ||
| - name: Download provider-proxy cache | ||
| # When running as a cron job, don't use the provider-proxy cache. | ||
| # The cron job is used to gather information about provider flakiness. | ||
| if: github.event_name != 'schedule' | ||
| run: | | ||
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache ./ci/download-provider-proxy-cache.sh | ||
| - name: Login to DockerHub | ||
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 | ||
| with: | ||
| username: ${{ secrets.DOCKERHUB_USERNAME }} | ||
| password: ${{ secrets.DOCKERHUB_TOKEN }} | ||
| - name: Write GCP JWT key to file | ||
| env: | ||
| GCP_JWT_KEY: ${{ secrets.GCP_JWT_KEY }} | ||
| run: echo "$GCP_JWT_KEY" > $GITHUB_WORKSPACE/gcp_jwt_key.json | ||
| - name: Pull images referenced by the compose file | ||
| run: docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy pull --ignore-pull-failures | ||
| - name: Run live tests container via Docker Compose | ||
| run: | | ||
| DOCKER_UID=$(id -u) DOCKER_GID=$(id -g) docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy run --rm -e TENSORZERO_CI=1 -e TENSORZERO_FF_WRITE_CONFIG_SNAPSHOT=1 live-tests | ||
| - name: Print live tests logs | ||
| if: always() | ||
| run: docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy logs -t | ||
| - name: Check e2e logs for impossible error messages | ||
| run: | | ||
| LOGS=$(docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml --profile provider-proxy logs gateway) | ||
| if [ -z "$LOGS" ]; then | ||
| echo "ERROR: Gateway logs are empty" | ||
| exit 1 | ||
| fi | ||
| grep --invert-match -i "please file a bug report" <<< "$LOGS" | ||
| # # TODO(https://github.com/tensorzero/tensorzero/issues/3989) - move this back to the end of the job | ||
| # # For now, we only check for deprecation warnings after running the Rust e2e tests | ||
| # TODO - re-enable this: https://github.com/tensorzero/tensorzero/issues/3989 | ||
| # - name: Check e2e logs for deprecation warnings (gateway e2e tests only) | ||
| # run: | | ||
| # LOGS=$(docker compose -f tensorzero-core/tests/e2e/docker-compose.live.yml logs gateway) | ||
| # if [ -z "$LOGS" ]; then | ||
| # echo "ERROR: Gateway logs are empty" | ||
| # exit 1 | ||
| # fi | ||
| # ! grep -i "Deprecation Warning" "$LOGS" | ||
| - name: Upload provider-proxy cache | ||
| # Only upload the cache when we're running from a 'good' run | ||
| # (running from the merge queue via `general.yml` or a cron job) | ||
| # This prevents manual workflow runs from modifying the cache | ||
| if: github.event_name == 'merge_group' || github.event_name == 'schedule' | ||
| run: | | ||
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache ./ci/upload-provider-proxy-cache.sh | ||
| client-tests: | ||
| needs: | ||
| [ | ||
| build-gateway-e2e-container, | ||
| build-gateway-container, | ||
| build-mock-provider-api-container, | ||
| build-fixtures-container, | ||
| build-provider-proxy-container, | ||
| ] | ||
| # Run even when build jobs are skipped (merge_group case where general.yml already built them) | ||
| if: always() && !failure() && !cancelled() && github.repository == 'tensorzero/tensorzero' | ||
| name: "client-tests (batch_writes: ${{ matrix.batch_writes }})" | ||
| runs-on: ubuntu-latest | ||
| <<<<<<< HEAD | ||
| ======= | ||
| if: github.repository == 'RouterBase/tensorzero' | ||
| >>>>>>> f413826b9 (chore: build docker images) | ||
| permissions: | ||
| # Permission to checkout the repository | ||
| contents: read | ||
| # Permission to download artifacts | ||
| actions: read | ||
| timeout-minutes: 45 | ||
| strategy: | ||
| matrix: | ||
| batch_writes: [true, false] | ||
| # Don't fail-fast for manual/cron runs, so that we get the full picture of what broke | ||
| fail-fast: ${{ github.event_name == 'merge_group' }} | ||
| steps: | ||
| - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 | ||
| - name: Install gdb | ||
| run: sudo apt-get update && sudo apt-get install -y gdb | ||
| - name: Warm up Modal instances | ||
| run: | | ||
| curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-inference-vllm-inference.modal.run/docs > vllm_modal_logs.txt & | ||
| curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--sglang-0-4-10-inference-sglang-inference.modal.run/ > sglang_modal_logs.txt & | ||
| # TODO: Re-enable once we can switch to a T4 GPU | ||
| # curl -H "Modal-Key: $MODAL_KEY" -H "Modal-Secret: $MODAL_SECRET" https://tensorzero--vllm-gpt-oss-20b-serve.modal.run/ > vllm_gpt_oss_modal_logs.txt & | ||
| - name: Cleanup disk space | ||
| uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be | ||
| - name: Restore client-tests provider-proxy cache | ||
| if: github.event_name != 'schedule' | ||
| uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 | ||
| with: | ||
| path: ./ci/provider-proxy-cache/ | ||
| key: provider-proxy-cache-client-tests-${{ github.run_id }} | ||
| restore-keys: provider-proxy-cache-client-tests- | ||
| - name: Download client-tests provider-proxy cache | ||
| # When running as a cron job, don't use the provider-proxy cache. | ||
| # The cron job is used to gather information about provider flakiness. | ||
| if: github.event_name != 'schedule' | ||
| run: | | ||
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache-client-tests ./ci/download-provider-proxy-cache.sh | ||
| - name: Update Rust | ||
| run: | | ||
| for attempt in 1 2 3; do | ||
| if rustup update stable && rustup default stable; then | ||
| break | ||
| fi | ||
| if [ $attempt -eq 3 ]; then | ||
| echo "Failed to update Rust after 3 attempts" | ||
| exit 1 | ||
| fi | ||
| sleep $((10 * attempt)) | ||
| done | ||
| shell: bash | ||
| - name: Download gateway container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-gateway-container | ||
| - name: Download mock-provider-api container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-mock-provider-api-container | ||
| - name: Download fixtures container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-fixtures-container | ||
| - name: Download gateway-e2e container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-gateway-e2e-container | ||
| - name: Download provider-proxy container image | ||
| uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 | ||
| with: | ||
| name: build-provider-proxy-container | ||
| - name: Load container images | ||
| run: | | ||
| docker load < gateway-container.tar | ||
| docker load < mock-provider-api-container.tar | ||
| docker load < fixtures-container.tar | ||
| docker load < gateway-e2e-container.tar | ||
| docker load < provider-proxy-container.tar | ||
| - name: Install Rust toolchain | ||
| run: | | ||
| for attempt in 1 2 3; do | ||
| if rustup toolchain install stable && rustup default stable; then | ||
| break | ||
| fi | ||
| if [ $attempt -eq 3 ]; then | ||
| echo "Failed to install Rust toolchain after 3 attempts" | ||
| exit 1 | ||
| fi | ||
| sleep $((10 * attempt)) | ||
| done | ||
| shell: bash | ||
| # Start testing workload identity federation credentials once the SDK adds support: https://github.com/googleapis/google-cloud-rust/issues/1342 | ||
| # - uses: 'google-github-actions/auth@v2' | ||
| # with: | ||
| # project_id: 'tensozero-public' | ||
| # workload_identity_provider: 'projects/454541351720/locations/global/workloadIdentityPools/github/providers/tensorzero' | ||
| - name: Print Rust version | ||
| run: rustc --version | ||
| - name: Install uv | ||
| uses: astral-sh/setup-uv@61cb8a9741eeb8a550a1b8544337180c0fc8476b | ||
| with: | ||
| version: "0.9.27" | ||
| - name: Install pnpm | ||
| run: | | ||
| for attempt in 1 2 3; do | ||
| if npm install -g pnpm@latest; then | ||
| break | ||
| fi | ||
| if [ $attempt -eq 3 ]; then | ||
| echo "Failed to install pnpm after 3 attempts" | ||
| exit 1 | ||
| fi | ||
| sleep $((10 * attempt)) | ||
| done | ||
| shell: bash | ||
| - name: Install JS dependencies | ||
| run: pnpm install --frozen-lockfile | ||
| - name: Login to DockerHub | ||
| uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 | ||
| with: | ||
| username: ${{ secrets.DOCKERHUB_USERNAME }} | ||
| password: ${{ secrets.DOCKERHUB_TOKEN }} | ||
| - name: Install cargo-nextest | ||
| uses: taiki-e/install-action@60581cd7025e0e855cebd745379013e286d9c787 | ||
| with: | ||
| tool: cargo-nextest | ||
| - name: Write GCP JWT key to file | ||
| env: | ||
| GCP_JWT_KEY: ${{ secrets.GCP_JWT_KEY }} | ||
| run: echo "$GCP_JWT_KEY" > $GITHUB_WORKSPACE/gcp_jwt_key.json | ||
| - name: Set up database URLs for E2E tests | ||
| run: | | ||
| echo "TENSORZERO_CLICKHOUSE_URL=http://chuser:chpassword@localhost:8123/tensorzero_e2e_tests" >> $GITHUB_ENV | ||
| echo "TENSORZERO_CLICKHOUSE_BATCH_WRITES=${{ matrix.batch_writes }}" >> $GITHUB_ENV | ||
| echo "DATABASE_URL=postgresql://postgres:postgres@localhost:5432/tensorzero-e2e-tests" >> $GITHUB_ENV | ||
| echo "TENSORZERO_POSTGRES_URL=postgresql://postgres:postgres@localhost:5432/tensorzero-e2e-tests" >> $GITHUB_ENV | ||
| echo "TENSORZERO_VALKEY_URL=redis://localhost:6379" >> $GITHUB_ENV | ||
| echo "TENSORZERO_SKIP_LARGE_FIXTURES=1" >> $GITHUB_ENV | ||
| - name: Configure batch writes in tensorzero.toml | ||
| if: matrix.batch_writes == true | ||
| run: | | ||
| echo "[gateway.observability.batch_writes]" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml | ||
| echo "enabled = true" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml | ||
| echo "flush_interval_ms = 80" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml | ||
| echo "__force_allow_embedded_batch_writes = true" >> tensorzero-core/tests/e2e/config/tensorzero.misc.toml | ||
| - name: Launch dependency services for E2E tests | ||
| run: | | ||
| DOCKER_UID=$(id -u) DOCKER_GID=$(id -g) docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy up --no-build -d --wait | ||
| - name: Print ClickHouse container logs | ||
| if: always() | ||
| run: | | ||
| docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy logs -t | ||
| - name: Set up gateway environment file | ||
| run: | | ||
| # TensorZero config | ||
| echo "TENSORZERO_CLICKHOUSE_URL=${TENSORZERO_CLICKHOUSE_URL}" >> gateway.env | ||
| echo "TENSORZERO_CLICKHOUSE_BATCH_WRITES=${TENSORZERO_CLICKHOUSE_BATCH_WRITES}" >> gateway.env | ||
| echo "TENSORZERO_POSTGRES_URL=${TENSORZERO_POSTGRES_URL}" >> gateway.env | ||
| echo "TENSORZERO_VALKEY_URL=${TENSORZERO_VALKEY_URL}" >> gateway.env | ||
| echo "TENSORZERO_E2E_PROXY=${TENSORZERO_E2E_PROXY}" >> gateway.env | ||
| echo "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT}" >> gateway.env | ||
| echo "GCP_VERTEX_CREDENTIALS_PATH=/app/gcp_jwt_key.json" >> gateway.env | ||
| echo "GOOGLE_APPLICATION_CREDENTIALS=/app/gcp_jwt_key.json" >> gateway.env | ||
| # Provider API keys | ||
| echo "ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}" >> gateway.env | ||
| echo "AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}" >> gateway.env | ||
| echo "AWS_BEARER_TOKEN_BEDROCK=${AWS_BEARER_TOKEN_BEDROCK}" >> gateway.env | ||
| echo "AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}" >> gateway.env | ||
| echo "AWS_REGION=${AWS_REGION}" >> gateway.env | ||
| echo "AZURE_API_KEY=${AZURE_API_KEY}" >> gateway.env | ||
| echo "AZURE_OPENAI_EASTUS2_API_KEY=${AZURE_OPENAI_EASTUS2_API_KEY}" >> gateway.env | ||
| echo "AZURE_OPENAI_DEPLOYMENT_ID=${AZURE_OPENAI_DEPLOYMENT_ID}" >> gateway.env | ||
| echo "AZURE_AI_FOUNDRY_API_KEY=${AZURE_AI_FOUNDRY_API_KEY}" >> gateway.env | ||
| echo "DEEPSEEK_API_KEY=${DEEPSEEK_API_KEY}" >> gateway.env | ||
| echo "FIREWORKS_API_KEY=${FIREWORKS_API_KEY}" >> gateway.env | ||
| echo "FIREWORKS_ACCOUNT_ID=${FIREWORKS_ACCOUNT_ID}" >> gateway.env | ||
| echo "GCP_STORAGE_ACCESS_KEY_ID=${GCP_STORAGE_ACCESS_KEY_ID}" >> gateway.env | ||
| echo "GCP_STORAGE_SECRET_ACCESS_KEY=${GCP_STORAGE_SECRET_ACCESS_KEY}" >> gateway.env | ||
| echo "GOOGLE_AI_STUDIO_API_KEY=${GOOGLE_AI_STUDIO_API_KEY}" >> gateway.env | ||
| echo "GROQ_API_KEY=${GROQ_API_KEY}" >> gateway.env | ||
| echo "HYPERBOLIC_API_KEY=${HYPERBOLIC_API_KEY}" >> gateway.env | ||
| echo "MISTRAL_API_KEY=${MISTRAL_API_KEY}" >> gateway.env | ||
| echo "MODAL_KEY=${MODAL_KEY}" >> gateway.env | ||
| echo "MODAL_SECRET=${MODAL_SECRET}" >> gateway.env | ||
| echo "OPENAI_API_KEY=${OPENAI_API_KEY}" >> gateway.env | ||
| echo "OPENROUTER_API_KEY=${OPENROUTER_API_KEY}" >> gateway.env | ||
| echo "SGLANG_API_KEY=${SGLANG_API_KEY}" >> gateway.env | ||
| echo "TGI_API_KEY=${TGI_API_KEY}" >> gateway.env | ||
| echo "TOGETHER_API_KEY=${TOGETHER_API_KEY}" >> gateway.env | ||
| echo "VLLM_API_KEY=${VLLM_API_KEY}" >> gateway.env | ||
| echo "VOYAGE_API_KEY=${VOYAGE_API_KEY}" >> gateway.env | ||
| echo "XAI_API_KEY=${XAI_API_KEY}" >> gateway.env | ||
| - name: Run postgres migrations | ||
| run: | | ||
| docker run --rm --network host \ | ||
| --env-file gateway.env \ | ||
| tensorzero/gateway-e2e:sha-${{ github.sha }} \ | ||
| --run-postgres-migrations | ||
| - name: Launch the gateway for E2E tests | ||
| timeout-minutes: 2 | ||
| run: | | ||
| docker run -d --name gateway-e2e --network host \ | ||
| --env-file gateway.env \ | ||
| -v ${{ github.workspace }}/gcp_jwt_key.json:/app/gcp_jwt_key.json:ro \ | ||
| -v ${{ github.workspace }}/tensorzero-core:/app/tensorzero-core:ro \ | ||
| -v ${{ github.workspace }}/ui/fixtures:/app/ui/fixtures:ro \ | ||
| tensorzero/gateway-e2e:sha-${{ github.sha }} \ | ||
| --config-file '/app/tensorzero-core/tests/e2e/config/tensorzero.*.toml' | ||
| while ! curl -s -f http://localhost:3000/health >/dev/null 2>&1; do | ||
| echo "Waiting for gateway to be healthy..." | ||
| sleep 1 | ||
| done | ||
| - name: Install Python for python async client tests | ||
| run: uv python install 3.9 | ||
| - name: "Python: PyO3 Client: pytest (non-mock tests)" | ||
| working-directory: clients/python | ||
| run: | | ||
| # Start the test in background and capture its PID | ||
| bash ./test.sh --verbose -n 8 -m "not mock" & | ||
| TEST_PID=$! | ||
| echo "Started test.sh with PID: $TEST_PID" | ||
| # Wait for 10 minutes (600 seconds) | ||
| for i in {1..600}; do | ||
| if ! kill -0 $TEST_PID 2>/dev/null; then | ||
| echo "Test completed normally" | ||
| wait $TEST_PID | ||
| exit $? | ||
| fi | ||
| sleep 1 | ||
| done | ||
| echo "Test has been running for 10 minutes, capturing backtraces..." | ||
| # Get all processes related to our test | ||
| echo "=== Process tree ===" | ||
| ps -ef | grep -E "(test\.sh|pytest|python)" | grep -v grep || true | ||
| echo "=== Capturing backtraces with gdb ===" | ||
| # Find all python processes that might be related to our test | ||
| PYTHON_PIDS=$(pgrep -f "tensorzero.*python" || true) | ||
| if [ -n "$PYTHON_PIDS" ]; then | ||
| for pid in $PYTHON_PIDS; do | ||
| echo "--- Backtrace for Python process $pid ---" | ||
| gdb -p $pid --batch \ | ||
| -ex "set pagination off" \ | ||
| -ex "thread apply all bt" \ | ||
| -ex "info threads" \ | ||
| -ex "detach" \ | ||
| -ex "quit" 2>&1 || true | ||
| echo "" | ||
| done | ||
| else | ||
| echo "No Python processes found" | ||
| fi | ||
| exit 1 | ||
| - name: "Python: PyO3 Client: pytest (mock tests)" | ||
| working-directory: clients/python | ||
| env: | ||
| TENSORZERO_INTERNAL_MOCK_PROVIDER_API: http://localhost:3030 | ||
| run: | | ||
| bash ./test.sh --verbose -n 8 -m mock | ||
| - name: "Node.js: OpenAI Client: test" | ||
| working-directory: clients/openai-node | ||
| run: | | ||
| pnpm run test | ||
| - name: Install Go | ||
| uses: actions/setup-go@29694d72cd5e7ef3b09496b39f28a942af47737e | ||
| with: | ||
| go-version: "1.24" | ||
| - name: "Go: OpenAI Client: test" | ||
| working-directory: clients/openai-go/tests | ||
| run: go test -v | ||
| - name: "Python: Recipes: pytest" | ||
| working-directory: recipes | ||
| run: | | ||
| uv run pytest | ||
| - name: Print e2e logs | ||
| if: always() | ||
| run: docker logs gateway-e2e 2>&1 || echo "No gateway container logs available" | ||
| - name: Check e2e logs for impossible error messages | ||
| run: | | ||
| LOGS=$(docker logs gateway-e2e 2>&1) | ||
| if [ -z "$LOGS" ]; then | ||
| echo "ERROR: Gateway logs are empty" | ||
| exit 1 | ||
| fi | ||
| grep --invert-match -i "please file a bug report" <<< "$LOGS" | ||
| - name: Terminate the gateway container | ||
| if: always() | ||
| run: | | ||
| docker stop gateway-e2e || true | ||
| docker rm gateway-e2e || true | ||
| - name: Print provider-proxy logs | ||
| if: always() | ||
| run: docker compose -f tensorzero-core/tests/e2e/docker-compose.yml --profile provider-proxy logs provider-proxy 2>&1 || echo "No provider-proxy logs available" | ||
| - name: Print vLLM modal logs | ||
| if: always() | ||
| run: cat vllm_modal_logs.txt | ||
| - name: Print SGLang modal logs | ||
| if: always() | ||
| run: cat sglang_modal_logs.txt | ||
| - name: Print vLLM GPT-OSS modal logs | ||
| if: always() | ||
| continue-on-error: true | ||
| run: cat vllm_gpt_oss_modal_logs.txt | ||
| - name: Upload client-tests provider-proxy cache | ||
| # Only upload the cache when we're running from a 'good' run | ||
| # (running from the merge queue via 'workflow_call' from general.yml, or a cron job) | ||
| # This prevents manual workflow runs from modifying the cache | ||
| if: github.event_name == 'merge_group' || github.event_name == 'schedule' | ||
| run: | | ||
| AWS_ACCESS_KEY_ID=$R2_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$R2_SECRET_ACCESS_KEY PROVIDER_PROXY_CACHE_BUCKET=provider-proxy-cache-client-tests ./ci/upload-provider-proxy-cache.sh | ||
| # See 'ci/README.md' at the repository root for more details. | ||
| check-all-tests-passed: | ||
| permissions: {} | ||
| if: always() && github.repository == 'RouterBase/tensorzero' | ||
| needs: [client-tests, live-tests] | ||
| runs-on: ubuntu-latest | ||
| steps: | ||
| # When running in the merge queue, jobs should never be skipped. | ||
| # In a scheduled run, some jobs may be intentionally skipped, as we only care about regenerating the model inference cache. | ||
| - if: ${{ contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') || (github.event_name != 'pull_request' && contains(needs.*.result, 'skipped')) }} | ||
| run: exit 1 | ||
| # Post to Slack on failure for scheduled (cron) runs only. | ||
| # Scheduled runs bypass the provider-proxy cache to detect provider flakiness. | ||
| # See: https://github.com/tensorzero/tensorzero/issues/5380 | ||
| - name: Post to Slack on failure (scheduled runs only) | ||
| if: failure() && github.event_name == 'schedule' | ||
| uses: slackapi/slack-github-action@v2.1.1 | ||
| with: | ||
| method: chat.postMessage | ||
| token: ${{ secrets.SLACK_BOT_TOKEN }} | ||
| payload: | | ||
| channel: C09DM0RGDFG | ||
| text: "Live provider tests failed (scheduled run, no cache): <https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>" | ||