braintrustdata · Caitlin Pinn (cpinn) · May 6, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/.github/workflows/verify-deployed-models.yaml b/.github/workflows/verify-deployed-models.yaml
@@ -120,6 +120,7 @@ jobs:
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
           fetch-depth: 0
+          ref: ${{ inputs.head_sha != '' && inputs.head_sha || inputs.head_ref }}
 
       - name: Set up Node.js
         uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0

diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts
@@ -607,6 +607,9 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = {
   "grok-2-1212": ["xAI"],
   "grok-vision-beta": ["xAI"],
   "grok-beta": ["xAI"],
+  "Qwen/Qwen3.6-Plus": ["together"],
+  "zai-org/GLM-5.1": ["together"],
+  "MiniMaxAI/MiniMax-M2.7": ["together"],
   "gemini-3.1-flash-image-preview": ["google", "vertex"],
   "gemini-2.5-flash-image": ["google", "vertex"],
   "mistral-medium-2508": ["mistral"],

diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json
@@ -4665,6 +4665,19 @@
       "together"
     ]
   },
+  "Qwen/Qwen3.6-Plus": {
+    "format": "openai",
+    "flavor": "chat",
+    "input_cost_per_mil_tokens": 0.5,
+    "output_cost_per_mil_tokens": 3,
+    "displayName": "Qwen3.6 Plus",
+    "max_input_tokens": 1000000,
+    "supports_streaming": true,
+    "streaming_only": true,
+    "available_providers": [
+      "together"
+    ]
+  },
   "magistral-medium-latest": {
     "format": "openai",
     "flavor": "chat",
@@ -9578,6 +9591,19 @@
       "baseten"
     ]
   },
+  "zai-org/GLM-5.1": {
+    "format": "openai",
+    "flavor": "chat",
+    "input_cost_per_mil_tokens": 1.4,
+    "output_cost_per_mil_tokens": 4.4,
+    "displayName": "GLM 5.1",
+    "reasoning": true,
+    "max_input_tokens": 202752,
+    "max_output_tokens": 128000,
+    "available_providers": [
+      "together"
+    ]
+  },
   "accounts/fireworks/models/glm-4p5": {
     "format": "openai",
     "flavor": "chat",
@@ -9747,6 +9773,18 @@
       "baseten"
     ]
   },
+  "MiniMaxAI/MiniMax-M2.7": {
+    "format": "openai",
+    "flavor": "chat",
+    "input_cost_per_mil_tokens": 0.3,
+    "output_cost_per_mil_tokens": 1.2,
+    "input_cache_read_cost_per_mil_tokens": 0.06,
+    "displayName": "MiniMax M2.7",
+    "max_input_tokens": 202752,
+    "available_providers": [
+      "together"
+    ]
+  },
   "accounts/fireworks/models/minimax-m2p1": {
     "format": "openai",
     "flavor": "chat",

diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts
@@ -96,6 +96,16 @@ export const ModelSchema = z.object({
     .number()
     .nullish()
     .describe("The model supports a maximum output token limit."),
+  supports_streaming: z
+    .boolean()
+    .nullish()
+    .describe("The model supports native streaming responses."),
+  streaming_only: z
+    .boolean()
+    .nullish()
+    .describe(
+      "The upstream provider requires requests for this model to be sent with streaming enabled.",
+    ),
   available_providers: z.array(z.enum(ModelEndpointType)).nullish(),
 });
 

diff --git a/packages/proxy/scripts/verify_proxy_models.test.ts b/packages/proxy/scripts/verify_proxy_models.test.ts
@@ -23,6 +23,30 @@ describe("buildVerificationRequest", () => {
       endpoint: "chat/completions",
     });
   });
+
+  it("forces stream mode for streaming-only models", () => {
+    expect(
+      buildVerificationRequest("Qwen/Qwen3.6-Plus", {
+        "Qwen/Qwen3.6-Plus": {
+          available_providers: ["together"],
+          format: "openai",
+          streaming_only: true,
+        },
+      }),
+    ).toEqual({
+      body: {
+        messages: [
+          {
+            content: "ok",
+            role: "user",
+          },
+        ],
+        model: "Qwen/Qwen3.6-Plus",
+        stream: true,
+      },
+      endpoint: "chat/completions",
+    });
+  });
 });
 
 describe("extractErrorMessage", () => {

diff --git a/packages/proxy/scripts/verify_proxy_models.ts b/packages/proxy/scripts/verify_proxy_models.ts
@@ -37,6 +37,7 @@ type VerificationModelSpec = {
   available_providers?: ModelEndpointType[];
   endpoint_types?: ModelEndpointType[];
   format?: ModelFormat;
+  streaming_only?: boolean;
 };
 
 type ModelCatalog = Record<string, VerificationModelSpec>;
@@ -187,7 +188,11 @@ export function resolveVercelProtectionBypassSecret(
   return secret;
 }
 
-export function buildVerificationRequest(model: string): VerificationRequest {
+export function buildVerificationRequest(
+  model: string,
+  modelCatalog: ModelCatalog = readModelCatalog(),
+): VerificationRequest {
+  const modelSpec = modelCatalog[model];
   return {
     endpoint: "chat/completions",
     body: {
@@ -198,6 +203,7 @@ export function buildVerificationRequest(model: string): VerificationRequest {
         },
       ],
       model,
+      ...(modelSpec?.streaming_only ? { stream: true } : {}),
     },
   };
 }
@@ -227,11 +233,12 @@ export function extractErrorMessage(responseBody: string): string {
 async function verifyModel(args: {
   apiKey: string;
   model: string;
+  modelCatalog: ModelCatalog;
   proxyBaseUrl: string;
   timeoutMs: number;
   vercelProtectionBypassSecret: string;
 }): Promise<VerificationResult> {
-  const request = buildVerificationRequest(args.model);
+  const request = buildVerificationRequest(args.model, args.modelCatalog);
   const url = new URL(request.endpoint, withTrailingSlash(args.proxyBaseUrl));
   const controller = new AbortController();
   const timeout = setTimeout(() => controller.abort(), args.timeoutMs);
@@ -353,6 +360,7 @@ async function main(): Promise<void> {
       result = await verifyModel({
         apiKey,
         model,
+        modelCatalog,
         proxyBaseUrl: argv["proxy-base-url"],
         timeoutMs: argv["timeout-ms"],
         vercelProtectionBypassSecret,