braintrustdata · Caitlin Pinn (cpinn) · May 6, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/.github/workflows/agent-provider-model-sync.yaml b/.github/workflows/agent-provider-model-sync.yaml
@@ -71,6 +71,7 @@ jobs:
 
             # Process
 
+            0. Before any discovery, use `mcp__github__search_issues` to find the 5 most recently opened issues with the `[BOT ISSUE]` title prefix. Record their titles. Do not create an issue whose gap duplicates any of these titles.
             1. Read the local schema and model list to understand current naming, ordering, and provider mapping conventions.
             2. For each provider, fetch the current official model list and check for: models missing locally, local models that are stale or deprecated, and missing or drifted pricing or token limits.
             3. Prefer official provider APIs for discovery. Fall back to official documentation. Never use third-party aggregators.
@@ -79,6 +80,17 @@ jobs:
             6. Do not report models that are deprecated, retired, or have an official retirement date within 90 days of today.
             7. Do not report embedding models. Embedding models are not supported in the playground and should not be recommended for `packages/proxy/schema/model_list.json`.
 
+            # Issue priority order
+
+            When deciding which gaps to file first, use this order:
+
+            1. **New models** — models that exist on the provider's public inference surface but are absent from `model_list.json`
+            2. **Model cost updates** — stale or incorrect pricing or token limits for models already in the catalog
+            3. **Model deprecations** — local models that have been officially deprecated or retired by the provider
+            4. **New model providers** — provider families not yet represented in the catalog at all
+
+            Work through providers in priority order. File higher-priority issues before lower-priority ones. If the run limit is reached, stop rather than filing lower-priority issues ahead of higher-priority ones.
+
             **Provider-specific constraints:**
 
             | Provider   | Discovery                                        | Pricing                                                        |
@@ -113,7 +125,7 @@ jobs:
             - Do not invent values, defaults, or placeholder models.
             - If a provider cannot be queried reliably in this run, skip it.
             - If a source is ambiguous about a rename versus a new model, do not create an issue unless the gap itself is still concrete.
-            - Create at most 5 issues per run; at most one per distinct gap.
+            - Create at most 10 issues per run; at most one per distinct gap.
             - Each issue must target exactly one provider and fit exactly one shape:
               1. a few missing models for one provider
               2. adding one provider to a few existing local models

diff --git a/.github/workflows/fix-missing-model-bot-issues.yaml b/.github/workflows/fix-missing-model-bot-issues.yaml
@@ -528,6 +528,12 @@ jobs:
             ${{ steps.result.outputs.litellm_comparison_summary }}
 
             ${{ steps.result.outputs.pr_litellm_table }}
+          reviewers: |
+            knjiang
+            cpinn
+            erin2722
+            CLowbrow
+            aswink
           labels: auto-sync
           signoff: false
 

diff --git a/.github/workflows/verify-deployed-models-on-vercel-comment.yaml b/.github/workflows/verify-deployed-models-on-vercel-comment.yaml
@@ -312,44 +312,46 @@ jobs:
       - name: Post verification summary to PR
         uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
         env:
-          FAILED_COUNT: ${{ needs.verify.outputs.failed_count }}
-          FAILED_MODELS_JSON: ${{ needs.verify.outputs.failed_models_json }}
-          PASSED_COUNT: ${{ needs.verify.outputs.passed_count }}
-          PASSED_MODELS_JSON: ${{ needs.verify.outputs.passed_models_json }}
           PULL_REQUEST_NUMBER: ${{ needs.prepare.outputs.pull_request_number }}
-          TESTED_MODELS_JSON: ${{ needs.verify.outputs.tested_models_json }}
+          RESULTS_JSON: ${{ needs.verify.outputs.results_json }}
           VERIFY_RESULT: ${{ needs.verify.result }}
         with:
           github-token: ${{ github.token }}
           script: |
             const marker = "<!-- proxy-model-verification -->";
 
-            function parseJsonArray(raw) {
+            function parseResults(raw) {
               if (!raw) {
                 return [];
               }
-
               try {
                 const parsed = JSON.parse(raw);
-                return Array.isArray(parsed) ? parsed.filter((value) => typeof value === "string") : [];
+                return Array.isArray(parsed) ? parsed : [];
               } catch (_error) {
                 return [];
               }
             }
 
-            function formatModels(models) {
-              if (models.length === 0) {
-                return "- None";
+            function formatTable(results) {
+              if (results.length === 0) {
+                return "_No models were tested._";
               }
 
-              return models.map((model) => `- \`${model}\``).join("\n");
+              const rows = results.map((result) => {
+                const status = result.ok ? "✅ Passed" : "❌ Failed";
+                return `| \`${result.model}\` | ${status} |`;
+              });
+
+              return [
+                "| Model | Status |",
+                "| --- | --- |",
+                ...rows,
+              ].join("\n");
             }
 
-            const testedModels = parseJsonArray(process.env.TESTED_MODELS_JSON);
-            const passedModels = parseJsonArray(process.env.PASSED_MODELS_JSON);
-            const failedModels = parseJsonArray(process.env.FAILED_MODELS_JSON);
-            const failedCount = Number(process.env.FAILED_COUNT || "0");
-            const passedCount = Number(process.env.PASSED_COUNT || "0");
+            const results = parseResults(process.env.RESULTS_JSON);
+            const failedCount = results.filter((r) => !r.ok).length;
+            const passedCount = results.filter((r) => r.ok).length;
             const pullRequestNumber = Number(process.env.PULL_REQUEST_NUMBER || "0");
             const verifyResult = process.env.VERIFY_RESULT || "unknown";
 
@@ -361,10 +363,12 @@ jobs:
             let statusLine = "Verification completed successfully.";
             if (verifyResult === "failure" || failedCount > 0) {
               statusLine = "Verification failed for one or more models.";
-            } else if (testedModels.length === 0) {
+            } else if (results.length === 0) {
               statusLine = "No changed models required verification.";
             }
 
+            const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+
             const body = [
               marker,
               "## Proxy model verification",
@@ -374,14 +378,9 @@ jobs:
               `Passed: ${passedCount}`,
               `Failed: ${failedCount}`,
               "",
-              "Tested models",
-              formatModels(testedModels),
-              "",
-              "Passed models",
-              formatModels(passedModels),
+              formatTable(results),
               "",
-              "Failed models",
-              formatModels(failedModels),
+              `[View job details](${runUrl})`,
             ].join("\n");
 
             try {

diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts
@@ -607,6 +607,8 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = {
   "grok-2-1212": ["xAI"],
   "grok-vision-beta": ["xAI"],
   "grok-beta": ["xAI"],
+  "magistral-medium-2509": ["mistral"],
+  "magistral-small-2509": ["mistral"],
   "google/gemma-4-31B-it": ["together"],
   "google/gemma-3n-E4B-it": ["together"],
   "Qwen/Qwen3.5-9B": ["together"],

diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json
@@ -4774,13 +4774,38 @@
     "flavor": "chat",
     "input_cost_per_mil_tokens": 0.5,
     "output_cost_per_mil_tokens": 1.5,
+    "displayName": "Magistral Small (2506)",
     "parent": "magistral-small-latest",
     "max_input_tokens": 40000,
     "max_output_tokens": 40000,
     "available_providers": [
       "mistral"
     ]
   },
+  "magistral-medium-2509": {
+    "format": "openai",
+    "flavor": "chat",
+    "input_cost_per_mil_tokens": 2,
+    "output_cost_per_mil_tokens": 5,
+    "displayName": "Magistral Medium (2509)",
+    "parent": "magistral-medium-latest",
+    "max_input_tokens": 128000,
+    "available_providers": [
+      "mistral"
+    ]
+  },
+  "magistral-small-2509": {
+    "format": "openai",
+    "flavor": "chat",
+    "input_cost_per_mil_tokens": 0.5,
+    "output_cost_per_mil_tokens": 1.5,
+    "displayName": "Magistral Small (2509)",
+    "parent": "magistral-small-latest",
+    "max_input_tokens": 128000,
+    "available_providers": [
+      "mistral"
+    ]
+  },
   "devstral-small-latest": {
     "format": "openai",
     "flavor": "chat",