diff --git a/.github/workflows/verify-deployed-models.yaml b/.github/workflows/verify-deployed-models.yaml index 60d83bde..ed13af65 100644 --- a/.github/workflows/verify-deployed-models.yaml +++ b/.github/workflows/verify-deployed-models.yaml @@ -120,6 +120,7 @@ jobs: uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 + ref: ${{ inputs.head_sha != '' && inputs.head_sha || inputs.head_ref }} - name: Set up Node.js uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0 diff --git a/packages/proxy/schema/index.ts b/packages/proxy/schema/index.ts index 95d2d2cd..750dbaf5 100644 --- a/packages/proxy/schema/index.ts +++ b/packages/proxy/schema/index.ts @@ -607,6 +607,9 @@ export const AvailableEndpointTypes: { [name: string]: ModelEndpointType[] } = { "grok-2-1212": ["xAI"], "grok-vision-beta": ["xAI"], "grok-beta": ["xAI"], + "Qwen/Qwen3.6-Plus": ["together"], + "zai-org/GLM-5.1": ["together"], + "MiniMaxAI/MiniMax-M2.7": ["together"], "gemini-3.1-flash-image-preview": ["google", "vertex"], "gemini-2.5-flash-image": ["google", "vertex"], "mistral-medium-2508": ["mistral"], diff --git a/packages/proxy/schema/model_list.json b/packages/proxy/schema/model_list.json index 2f9e6ad2..fdabe091 100644 --- a/packages/proxy/schema/model_list.json +++ b/packages/proxy/schema/model_list.json @@ -4665,6 +4665,19 @@ "together" ] }, + "Qwen/Qwen3.6-Plus": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 0.5, + "output_cost_per_mil_tokens": 3, + "displayName": "Qwen3.6 Plus", + "max_input_tokens": 1000000, + "supports_streaming": true, + "streaming_only": true, + "available_providers": [ + "together" + ] + }, "magistral-medium-latest": { "format": "openai", "flavor": "chat", @@ -9578,6 +9591,19 @@ "baseten" ] }, + "zai-org/GLM-5.1": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 1.4, + "output_cost_per_mil_tokens": 4.4, + "displayName": "GLM 5.1", + "reasoning": true, + "max_input_tokens": 202752, + "max_output_tokens": 128000, + "available_providers": [ + "together" + ] + }, "accounts/fireworks/models/glm-4p5": { "format": "openai", "flavor": "chat", @@ -9747,6 +9773,18 @@ "baseten" ] }, + "MiniMaxAI/MiniMax-M2.7": { + "format": "openai", + "flavor": "chat", + "input_cost_per_mil_tokens": 0.3, + "output_cost_per_mil_tokens": 1.2, + "input_cache_read_cost_per_mil_tokens": 0.06, + "displayName": "MiniMax M2.7", + "max_input_tokens": 202752, + "available_providers": [ + "together" + ] + }, "accounts/fireworks/models/minimax-m2p1": { "format": "openai", "flavor": "chat", diff --git a/packages/proxy/schema/models.ts b/packages/proxy/schema/models.ts index 8da75b1f..7aac8d33 100644 --- a/packages/proxy/schema/models.ts +++ b/packages/proxy/schema/models.ts @@ -96,6 +96,16 @@ export const ModelSchema = z.object({ .number() .nullish() .describe("The model supports a maximum output token limit."), + supports_streaming: z + .boolean() + .nullish() + .describe("The model supports native streaming responses."), + streaming_only: z + .boolean() + .nullish() + .describe( + "The upstream provider requires requests for this model to be sent with streaming enabled.", + ), available_providers: z.array(z.enum(ModelEndpointType)).nullish(), }); diff --git a/packages/proxy/scripts/verify_proxy_models.test.ts b/packages/proxy/scripts/verify_proxy_models.test.ts index c91d265c..af4d5d40 100644 --- a/packages/proxy/scripts/verify_proxy_models.test.ts +++ b/packages/proxy/scripts/verify_proxy_models.test.ts @@ -23,6 +23,30 @@ describe("buildVerificationRequest", () => { endpoint: "chat/completions", }); }); + + it("forces stream mode for streaming-only models", () => { + expect( + buildVerificationRequest("Qwen/Qwen3.6-Plus", { + "Qwen/Qwen3.6-Plus": { + available_providers: ["together"], + format: "openai", + streaming_only: true, + }, + }), + ).toEqual({ + body: { + messages: [ + { + content: "ok", + role: "user", + }, + ], + model: "Qwen/Qwen3.6-Plus", + stream: true, + }, + endpoint: "chat/completions", + }); + }); }); describe("extractErrorMessage", () => { diff --git a/packages/proxy/scripts/verify_proxy_models.ts b/packages/proxy/scripts/verify_proxy_models.ts index 0eb28449..0de0d33a 100644 --- a/packages/proxy/scripts/verify_proxy_models.ts +++ b/packages/proxy/scripts/verify_proxy_models.ts @@ -37,6 +37,7 @@ type VerificationModelSpec = { available_providers?: ModelEndpointType[]; endpoint_types?: ModelEndpointType[]; format?: ModelFormat; + streaming_only?: boolean; }; type ModelCatalog = Record; @@ -187,7 +188,11 @@ export function resolveVercelProtectionBypassSecret( return secret; } -export function buildVerificationRequest(model: string): VerificationRequest { +export function buildVerificationRequest( + model: string, + modelCatalog: ModelCatalog = readModelCatalog(), +): VerificationRequest { + const modelSpec = modelCatalog[model]; return { endpoint: "chat/completions", body: { @@ -198,6 +203,7 @@ export function buildVerificationRequest(model: string): VerificationRequest { }, ], model, + ...(modelSpec?.streaming_only ? { stream: true } : {}), }, }; } @@ -227,11 +233,12 @@ export function extractErrorMessage(responseBody: string): string { async function verifyModel(args: { apiKey: string; model: string; + modelCatalog: ModelCatalog; proxyBaseUrl: string; timeoutMs: number; vercelProtectionBypassSecret: string; }): Promise { - const request = buildVerificationRequest(args.model); + const request = buildVerificationRequest(args.model, args.modelCatalog); const url = new URL(request.endpoint, withTrailingSlash(args.proxyBaseUrl)); const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), args.timeoutMs); @@ -353,6 +360,7 @@ async function main(): Promise { result = await verifyModel({ apiKey, model, + modelCatalog, proxyBaseUrl: argv["proxy-base-url"], timeoutMs: argv["timeout-ms"], vercelProtectionBypassSecret,