diff --git a/.github/workflows/agent-provider-model-sync.yaml b/.github/workflows/agent-provider-model-sync.yaml index 9b841dfb..6739f861 100644 --- a/.github/workflows/agent-provider-model-sync.yaml +++ b/.github/workflows/agent-provider-model-sync.yaml @@ -71,6 +71,7 @@ jobs: # Process + 0. Before any discovery, use `mcp__github__search_issues` to find the 5 most recently opened issues with the `[BOT ISSUE]` title prefix. Record their titles. Do not create an issue whose gap duplicates any of these titles. 1. Read the local schema and model list to understand current naming, ordering, and provider mapping conventions. 2. For each provider, fetch the current official model list and check for: models missing locally, local models that are stale or deprecated, and missing or drifted pricing or token limits. 3. Prefer official provider APIs for discovery. Fall back to official documentation. Never use third-party aggregators. @@ -79,6 +80,17 @@ jobs: 6. Do not report models that are deprecated, retired, or have an official retirement date within 90 days of today. 7. Do not report embedding models. Embedding models are not supported in the playground and should not be recommended for `packages/proxy/schema/model_list.json`. + # Issue priority order + + When deciding which gaps to file first, use this order: + + 1. **New models** — models that exist on the provider's public inference surface but are absent from `model_list.json` + 2. **Model cost updates** — stale or incorrect pricing or token limits for models already in the catalog + 3. **Model deprecations** — local models that have been officially deprecated or retired by the provider + 4. **New model providers** — provider families not yet represented in the catalog at all + + Work through providers in priority order. File higher-priority issues before lower-priority ones. If the run limit is reached, stop rather than filing lower-priority issues ahead of higher-priority ones. + **Provider-specific constraints:** | Provider | Discovery | Pricing | @@ -113,7 +125,7 @@ jobs: - Do not invent values, defaults, or placeholder models. - If a provider cannot be queried reliably in this run, skip it. - If a source is ambiguous about a rename versus a new model, do not create an issue unless the gap itself is still concrete. - - Create at most 5 issues per run; at most one per distinct gap. + - Create at most 10 issues per run; at most one per distinct gap. - Each issue must target exactly one provider and fit exactly one shape: 1. a few missing models for one provider 2. adding one provider to a few existing local models diff --git a/.github/workflows/fix-missing-model-bot-issues.yaml b/.github/workflows/fix-missing-model-bot-issues.yaml index 7c714402..ed5ea7d3 100644 --- a/.github/workflows/fix-missing-model-bot-issues.yaml +++ b/.github/workflows/fix-missing-model-bot-issues.yaml @@ -528,6 +528,12 @@ jobs: ${{ steps.result.outputs.litellm_comparison_summary }} ${{ steps.result.outputs.pr_litellm_table }} + reviewers: | + knjiang + cpinn + erin2722 + CLowbrow + aswink labels: auto-sync signoff: false diff --git a/.github/workflows/verify-deployed-models-on-vercel-comment.yaml b/.github/workflows/verify-deployed-models-on-vercel-comment.yaml index a6012293..50fb67ba 100644 --- a/.github/workflows/verify-deployed-models-on-vercel-comment.yaml +++ b/.github/workflows/verify-deployed-models-on-vercel-comment.yaml @@ -312,44 +312,46 @@ jobs: - name: Post verification summary to PR uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0 env: - FAILED_COUNT: ${{ needs.verify.outputs.failed_count }} - FAILED_MODELS_JSON: ${{ needs.verify.outputs.failed_models_json }} - PASSED_COUNT: ${{ needs.verify.outputs.passed_count }} - PASSED_MODELS_JSON: ${{ needs.verify.outputs.passed_models_json }} PULL_REQUEST_NUMBER: ${{ needs.prepare.outputs.pull_request_number }} - TESTED_MODELS_JSON: ${{ needs.verify.outputs.tested_models_json }} + RESULTS_JSON: ${{ needs.verify.outputs.results_json }} VERIFY_RESULT: ${{ needs.verify.result }} with: github-token: ${{ github.token }} script: | const marker = ""; - function parseJsonArray(raw) { + function parseResults(raw) { if (!raw) { return []; } - try { const parsed = JSON.parse(raw); - return Array.isArray(parsed) ? parsed.filter((value) => typeof value === "string") : []; + return Array.isArray(parsed) ? parsed : []; } catch (_error) { return []; } } - function formatModels(models) { - if (models.length === 0) { - return "- None"; + function formatTable(results) { + if (results.length === 0) { + return "_No models were tested._"; } - return models.map((model) => `- \`${model}\``).join("\n"); + const rows = results.map((result) => { + const status = result.ok ? "✅ Passed" : "❌ Failed"; + return `| \`${result.model}\` | ${status} |`; + }); + + return [ + "| Model | Status |", + "| --- | --- |", + ...rows, + ].join("\n"); } - const testedModels = parseJsonArray(process.env.TESTED_MODELS_JSON); - const passedModels = parseJsonArray(process.env.PASSED_MODELS_JSON); - const failedModels = parseJsonArray(process.env.FAILED_MODELS_JSON); - const failedCount = Number(process.env.FAILED_COUNT || "0"); - const passedCount = Number(process.env.PASSED_COUNT || "0"); + const results = parseResults(process.env.RESULTS_JSON); + const failedCount = results.filter((r) => !r.ok).length; + const passedCount = results.filter((r) => r.ok).length; const pullRequestNumber = Number(process.env.PULL_REQUEST_NUMBER || "0"); const verifyResult = process.env.VERIFY_RESULT || "unknown"; @@ -361,10 +363,12 @@ jobs: let statusLine = "Verification completed successfully."; if (verifyResult === "failure" || failedCount > 0) { statusLine = "Verification failed for one or more models."; - } else if (testedModels.length === 0) { + } else if (results.length === 0) { statusLine = "No changed models required verification."; } + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const body = [ marker, "## Proxy model verification", @@ -374,14 +378,9 @@ jobs: `Passed: ${passedCount}`, `Failed: ${failedCount}`, "", - "Tested models", - formatModels(testedModels), - "", - "Passed models", - formatModels(passedModels), + formatTable(results), "", - "Failed models", - formatModels(failedModels), + `[View job details](${runUrl})`, ].join("\n"); try { diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index dc89ba30..1919178f 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -607,6 +607,8 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "grok-2-1212": ["xAI"], "grok-vision-beta": ["xAI"], "grok-beta": ["xAI"], + "magistral-medium-2509": ["mistral"], + "magistral-small-2509": ["mistral"], "google/gemma-4-31B-it": ["together"], "google/gemma-3n-E4B-it": ["together"], "Qwen/Qwen3.5-9B": ["together"], diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json index 31fd0dbd..146021ce 100644 --- a/packages/proxy/schema/model_list.json +++ b/packages/proxy/schema/model_list.json @@ -4774,6 +4774,7 @@ "flavor": "chat", "input_cost_per_mil_tokens": 0.5, "output_cost_per_mil_tokens": 1.5, + "displayName": "Magistral Small (2506)", "parent": "magistral-small-latest", "max_input_tokens": 40000, "max_output_tokens": 40000, @@ -4781,6 +4782,30 @@ "mistral" ] }, + "magistral-medium-2509": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 2, + "output_cost_per_mil_tokens": 5, + "displayName": "Magistral Medium (2509)", + "parent": "magistral-medium-latest", + "max_input_tokens": 128000, + "available_providers": [ + "mistral" + ] + }, + "magistral-small-2509": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 0.5, + "output_cost_per_mil_tokens": 1.5, + "displayName": "Magistral Small (2509)", + "parent": "magistral-small-latest", + "max_input_tokens": 128000, + "available_providers": [ + "mistral" + ] + }, "devstral-small-latest": { "format": "openai", "flavor": "chat",