diff --git a/src/app/api/openrouter/[...path]/route.ts b/src/app/api/openrouter/[...path]/route.ts index 4f16bc3af..d703e43a3 100644 --- a/src/app/api/openrouter/[...path]/route.ts +++ b/src/app/api/openrouter/[...path]/route.ts @@ -14,7 +14,6 @@ import { isFreeModel, isDataCollectionRequiredOnKiloCodeOnly, isDeadFreeModel, - isSlackbotOnlyModel, isRateLimitedModel, } from '@/lib/models'; import { @@ -68,9 +67,6 @@ import { isActiveReviewPromo } from '@/lib/code-reviews/core/constants'; const MAX_TOKENS_LIMIT = 99999999999; // GPT4.1 default is ~32k -const OPUS = CLAUDE_OPUS_CURRENT_MODEL_ID; -const SONNET = CLAUDE_SONNET_CURRENT_MODEL_ID; - const PAID_MODEL_AUTH_REQUIRED = 'PAID_MODEL_AUTH_REQUIRED'; const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED'; @@ -78,19 +74,19 @@ const PROMOTION_MODEL_LIMIT_REACHED = 'PROMOTION_MODEL_LIMIT_REACHED'; // Add/remove/modify entries here to change routing behavior. const MODE_TO_MODEL = new Map([ // Opus modes (planning, reasoning, orchestration, debugging) - ['plan', OPUS], - ['general', OPUS], - ['architect', OPUS], - ['orchestrator', OPUS], - ['ask', OPUS], - ['debug', OPUS], + ['plan', CLAUDE_OPUS_CURRENT_MODEL_ID], + ['general', CLAUDE_OPUS_CURRENT_MODEL_ID], + ['architect', CLAUDE_OPUS_CURRENT_MODEL_ID], + ['orchestrator', CLAUDE_OPUS_CURRENT_MODEL_ID], + ['ask', CLAUDE_OPUS_CURRENT_MODEL_ID], + ['debug', CLAUDE_OPUS_CURRENT_MODEL_ID], // Sonnet modes (implementation, exploration) - ['build', SONNET], - ['explore', SONNET], - ['code', SONNET], + ['build', CLAUDE_SONNET_CURRENT_MODEL_ID], + ['explore', CLAUDE_SONNET_CURRENT_MODEL_ID], + ['code', CLAUDE_SONNET_CURRENT_MODEL_ID], ]); -const DEFAULT_AUTO_MODEL = SONNET; +const DEFAULT_AUTO_MODEL = CLAUDE_SONNET_CURRENT_MODEL_ID; function resolveAutoModel(modeHeader: string | null) { const mode = modeHeader?.trim().toLowerCase() ?? 'build'; @@ -186,14 +182,12 @@ export async function POST(request: NextRequest): Promise m.public_id === model && m.is_enabled && !m.slackbot_only); + return kiloFreeModels.some(m => m.public_id === model && m.is_enabled); } export function isDataCollectionRequiredOnKiloCodeOnly(model: string): boolean { @@ -62,11 +61,8 @@ export const kiloFreeModels = [ corethink_free_model, giga_potato_model, giga_potato_thinking_model, - minimax_m21_free_model, minimax_m25_free_model, - opus_46_free_slackbot_model, grok_code_fast_1_optimized_free_model, - zai_glm47_free_model, zai_glm5_free_model, ] as KiloFreeModel[]; @@ -87,12 +83,3 @@ export function extraRequiredProviders(model: string) { export function isDeadFreeModel(model: string): boolean { return !!kiloFreeModels.find(m => m.public_id === model && !m.is_enabled); } - -/** - * Check if a model is only available through Kilo for Slack (internalApiUse). - * These models are hidden from the public model list and return "model does not exist" - * when accessed outside of the Slack integration. - */ -export function isSlackbotOnlyModel(model: string): boolean { - return !!kiloFreeModels.find(m => m.public_id === model && m.slackbot_only); -} diff --git a/src/lib/providerHash.test.ts b/src/lib/providerHash.test.ts index 92c481eca..f4360124c 100644 --- a/src/lib/providerHash.test.ts +++ b/src/lib/providerHash.test.ts @@ -6,9 +6,9 @@ describe('generateProviderSpecificHash', () => { it('should generate different hashes for different providers', () => { const openRouterHash = generateProviderSpecificHash(testUserId, PROVIDERS.OPENROUTER); - const grokHash = generateProviderSpecificHash(testUserId, PROVIDERS.XAI); + const vercelHash = generateProviderSpecificHash(testUserId, PROVIDERS.VERCEL_AI_GATEWAY); - expect(openRouterHash).not.toBe(grokHash); + expect(openRouterHash).not.toBe(vercelHash); }); it('should generate consistent hashes for the same provider and user', () => { @@ -26,7 +26,7 @@ describe('generateProviderSpecificHash', () => { }); it('should return a base64 encoded string', () => { - const hash = generateProviderSpecificHash(testUserId, PROVIDERS.XAI); + const hash = generateProviderSpecificHash(testUserId, PROVIDERS.VERCEL_AI_GATEWAY); // Base64 pattern check expect(hash).toMatch(/^[A-Za-z0-9+/]+=*$/); diff --git a/src/lib/providers/anthropic.ts b/src/lib/providers/anthropic.ts index 921ba5685..381a76118 100644 --- a/src/lib/providers/anthropic.ts +++ b/src/lib/providers/anthropic.ts @@ -1,4 +1,3 @@ -import type { KiloFreeModel } from '@/lib/providers/kilo-free-model'; import type { OpenRouterChatCompletionRequest } from '@/lib/providers/openrouter/types'; import { normalizeToolCallIds } from '@/lib/tool-calling'; import type OpenAI from 'openai'; @@ -7,26 +6,8 @@ export const CLAUDE_SONNET_CURRENT_MODEL_ID = 'anthropic/claude-sonnet-4.6'; export const CLAUDE_OPUS_CURRENT_MODEL_ID = 'anthropic/claude-opus-4.6'; -export const opus_46_free_slackbot_model = { - public_id: 'anthropic/claude-opus-4.6:slackbot', - display_name: 'Anthropic: Claude Opus 4.6 (Free for Kilo for Slack)', - description: 'Free version of Claude Opus 4.6 for use in Kilo for Slack only', - context_length: 1_000_000, - max_completion_tokens: 32000, - is_enabled: false, - flags: ['reasoning', 'prompt_cache', 'vision'], - gateway: 'openrouter', - internal_id: 'anthropic/claude-opus-4.6', - inference_providers: [], - slackbot_only: true, -} as KiloFreeModel; - -const ENABLE_ANTHROPIC_STRICT_TOOL_USE = false; - const ENABLE_ANTHROPIC_AUTOMATIC_CACHING = true; -const ENABLE_ANTHROPIC_FINE_GRAINED_TOOL_STREAMING = true; - export function isAnthropicModel(requestedModel: string) { return requestedModel.startsWith('anthropic/'); } @@ -35,62 +16,12 @@ export function isHaikuModel(requestedModel: string) { return requestedModel.startsWith('anthropic/claude-haiku'); } -export function isOpusModel(requestedModel: string) { - return requestedModel.startsWith('anthropic/claude-opus'); -} - -type ReadFileParametersSchema = { - properties?: { - files?: { - items?: { - properties?: { line_ranges?: { items?: { minItems?: number; maxItems?: number } } }; - }; - }; - }; -}; - -function patchReadFileTool(func: OpenAI.FunctionDefinition) { - try { - const lineRangesItems = (func.parameters as ReadFileParametersSchema | undefined)?.properties - ?.files?.items?.properties?.line_ranges?.items; - if (lineRangesItems) { - delete lineRangesItems.minItems; - delete lineRangesItems.maxItems; - } - func.strict = true; - return true; - } catch (e) { - console.error('[patchReadFileTool]', e); - return false; - } -} - function appendAnthropicBetaHeader(extraHeaders: Record, betaFlag: string) { extraHeaders['x-anthropic-beta'] = [extraHeaders['x-anthropic-beta'], betaFlag] .filter(Boolean) .join(','); } -function applyAnthropicStrictToolUse( - requestToMutate: OpenRouterChatCompletionRequest, - extraHeaders: Record -) { - let supportedToolFound = false; - for (const tool of requestToMutate.tools ?? []) { - if (tool.type === 'function') { - if (tool.function.name === 'read_file' && patchReadFileTool(tool.function)) { - supportedToolFound = true; - } else { - delete tool.function.strict; - } - } - } - if (supportedToolFound) { - console.debug('[applyAnthropicStrictToolUse] setting structured-outputs beta header'); - appendAnthropicBetaHeader(extraHeaders, 'structured-outputs-2025-11-13'); - } -} - function hasCacheControl(message: OpenAI.ChatCompletionMessageParam) { return ( 'cache_control' in message || @@ -163,16 +94,7 @@ export function applyAnthropicModelSettings( requestToMutate: OpenRouterChatCompletionRequest, extraHeaders: Record ) { - if (ENABLE_ANTHROPIC_STRICT_TOOL_USE) { - applyAnthropicStrictToolUse(requestToMutate, extraHeaders); - } - - if (ENABLE_ANTHROPIC_FINE_GRAINED_TOOL_STREAMING) { - console.debug( - '[applyAnthropicModelSettings] setting fine-grained-tool-streaming-2025-05-14 beta header' - ); - appendAnthropicBetaHeader(extraHeaders, 'fine-grained-tool-streaming-2025-05-14'); - } + appendAnthropicBetaHeader(extraHeaders, 'fine-grained-tool-streaming-2025-05-14'); if (ENABLE_ANTHROPIC_AUTOMATIC_CACHING) { // kilo/auto doesn't get cache breakpoints, because clients don't know it's a Claude model diff --git a/src/lib/providers/index.ts b/src/lib/providers/index.ts index a01d0686d..c6a6156e8 100644 --- a/src/lib/providers/index.ts +++ b/src/lib/providers/index.ts @@ -14,7 +14,6 @@ import { import { applyXaiModelSettings, isXaiModel } from '@/lib/providers/xai'; import { applyVercelSettings, shouldRouteToVercel } from '@/lib/providers/vercel'; import { kiloFreeModels } from '@/lib/models'; -import { applyMinimaxProviderSettings } from '@/lib/providers/minimax'; import { applyAnthropicModelSettings, isAnthropicModel, @@ -71,13 +70,6 @@ export const PROVIDERS = { hasGenerationEndpoint: false, requiresResponseRewrite: true, }, - INCEPTION: { - id: 'inception', - apiUrl: 'https://api.inceptionlabs.ai/v1', - apiKey: getEnvVariable('INCEPTION_API_KEY'), - hasGenerationEndpoint: false, - requiresResponseRewrite: false, - }, MARTIAN: { id: 'martian', apiUrl: 'https://api.withmartian.com/v1', @@ -92,20 +84,6 @@ export const PROVIDERS = { hasGenerationEndpoint: false, requiresResponseRewrite: false, }, - MINIMAX: { - id: 'minimax', - apiUrl: 'https://api.minimax.io/v1', - apiKey: getEnvVariable('MINIMAX_API_KEY'), - hasGenerationEndpoint: false, - requiresResponseRewrite: false, - }, - STREAMLAKE: { - id: 'streamlake', - apiUrl: 'https://vanchin.streamlake.ai/api/gateway/v1/endpoints', - apiKey: getEnvVariable('STREAMLAKE_API_KEY'), - hasGenerationEndpoint: false, - requiresResponseRewrite: false, - }, VERCEL_AI_GATEWAY: { id: 'vercel', apiUrl: 'https://ai-gateway.vercel.sh/v1', @@ -113,13 +91,6 @@ export const PROVIDERS = { hasGenerationEndpoint: true, requiresResponseRewrite: false, }, - XAI: { - id: 'x-ai', - apiUrl: 'https://api.x.ai/v1', - apiKey: getEnvVariable('XAI_API_KEY'), - hasGenerationEndpoint: false, - requiresResponseRewrite: false, - }, } as const satisfies Record; export async function getProvider( @@ -305,10 +276,6 @@ export function applyProviderSpecificLogic( applyCoreThinkProviderSettings(requestToMutate); } - if (provider.id === 'minimax') { - applyMinimaxProviderSettings(requestToMutate); - } - if (provider.id === 'mistral') { applyMistralProviderSettings(requestToMutate, extraHeaders); } else if (isMistralModel(requestedModel)) { diff --git a/src/lib/providers/kilo-free-model.ts b/src/lib/providers/kilo-free-model.ts index 29c81f5b0..5baa3d03b 100644 --- a/src/lib/providers/kilo-free-model.ts +++ b/src/lib/providers/kilo-free-model.ts @@ -14,8 +14,6 @@ export type KiloFreeModel = { gateway: ProviderId; internal_id: string; inference_providers: OpenRouterInferenceProviderId[]; - /** If true, this model is only available through Kilo for Slack (internalApiUse) and hidden from public model list */ - slackbot_only?: boolean; }; export function convertFromKiloModel(model: KiloFreeModel) { diff --git a/src/lib/providers/minimax.ts b/src/lib/providers/minimax.ts index abba4855d..906fee0dc 100644 --- a/src/lib/providers/minimax.ts +++ b/src/lib/providers/minimax.ts @@ -1,19 +1,4 @@ import { type KiloFreeModel } from '@/lib/providers/kilo-free-model'; -import type { OpenRouterChatCompletionRequest } from '@/lib/providers/openrouter/types'; - -export const minimax_m21_free_model = { - public_id: 'minimax/minimax-m2.1:free', - display_name: '[DEPRECATED, upgrade to M2.5] MiniMax M2.1 (free)', - description: - 'MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world capability while maintaining exceptional latency, scalability, and cost efficiency.\n\nCompared to its predecessor, M2.1 delivers cleaner, more concise outputs and faster perceived response times. It shows leading multilingual coding performance across major systems and application languages, achieving 49.4% on Multi-SWE-Bench and 72.5% on SWE-Bench Multilingual, and serves as a versatile agent “brain” for IDEs, coding tools, and general-purpose assistance.', - context_length: 204800, - max_completion_tokens: 131072, - is_enabled: false, - flags: ['reasoning', 'prompt_cache'], - gateway: 'openrouter', - internal_id: 'minimax/minimax-m2.1', - inference_providers: ['minimax'], -} as KiloFreeModel; export const minimax_m25_free_model = { public_id: 'minimax/minimax-m2.5:free', @@ -28,7 +13,3 @@ export const minimax_m25_free_model = { internal_id: 'minimax/minimax-m2.5', inference_providers: ['minimax'], } as KiloFreeModel; - -export function applyMinimaxProviderSettings(requestToMutate: OpenRouterChatCompletionRequest) { - requestToMutate.reasoning_split = true; -} diff --git a/src/lib/providers/provider-id.ts b/src/lib/providers/provider-id.ts index aa02c0830..3341dcfdc 100644 --- a/src/lib/providers/provider-id.ts +++ b/src/lib/providers/provider-id.ts @@ -2,15 +2,8 @@ export type ProviderId = | 'openrouter' | 'gigapotato' | 'corethink' - | 'inception' | 'martian' - | 'minimax' | 'mistral' - | 'novita' - | 'genlabs' - | 'streamlake' - | 'x-ai' | 'vercel' - | 'anthropic' | 'custom' | 'dev-tools'; diff --git a/src/lib/providers/vercel.ts b/src/lib/providers/vercel.ts index 2798cd0c5..664ac8551 100644 --- a/src/lib/providers/vercel.ts +++ b/src/lib/providers/vercel.ts @@ -1,8 +1,8 @@ import type { BYOKResult } from '@/lib/byok'; import { kiloFreeModels } from '@/lib/models'; -import { isAnthropicModel, isOpusModel } from '@/lib/providers/anthropic'; +import { isAnthropicModel } from '@/lib/providers/anthropic'; import { getGatewayErrorRate } from '@/lib/providers/gateway-error-rate'; -import { minimax_m21_free_model, minimax_m25_free_model } from '@/lib/providers/minimax'; +import { minimax_m25_free_model } from '@/lib/providers/minimax'; import { AutocompleteUserByokProviderIdSchema, inferVercelFirstPartyInferenceProviderForModel, @@ -15,7 +15,7 @@ import type { VercelInferenceProviderConfig, VercelProviderConfig, } from '@/lib/providers/openrouter/types'; -import { zai_glm47_free_model, zai_glm5_free_model } from '@/lib/providers/zai'; +import { zai_glm5_free_model } from '@/lib/providers/zai'; import * as crypto from 'crypto'; // EMERGENCY SWITCH @@ -28,14 +28,12 @@ const VERCEL_ROUTING_ALLOW_LIST = [ 'arcee-ai/trinity-large-preview:free', 'google/gemini-3-pro-preview', 'google/gemini-3-flash-preview', - minimax_m21_free_model.public_id, 'minimax/minimax-m2.1', minimax_m25_free_model.public_id, 'minimax/minimax-m2.5', 'openai/gpt-5.2', 'openai/gpt-5.2-codex', 'x-ai/grok-code-fast-1', - zai_glm47_free_model.public_id, 'z-ai/glm-4.7', zai_glm5_free_model.public_id, 'z-ai/glm-5', @@ -170,7 +168,7 @@ export function applyVercelSettings( requestToMutate.providerOptions = convertProviderOptions(requestToMutate.provider); } - if (isOpusModel(requestedModel) && requestToMutate.providerOptions && requestToMutate.verbosity) { + if (requestToMutate.providerOptions && requestToMutate.verbosity) { requestToMutate.providerOptions.anthropic = { effort: requestToMutate.verbosity, }; diff --git a/src/lib/providers/zai.ts b/src/lib/providers/zai.ts index 5bc00b40a..8dbb0f615 100644 --- a/src/lib/providers/zai.ts +++ b/src/lib/providers/zai.ts @@ -1,19 +1,5 @@ import { type KiloFreeModel } from '@/lib/providers/kilo-free-model'; -export const zai_glm47_free_model = { - public_id: 'z-ai/glm-4.7:free', - display_name: 'Z.AI: GLM 4.7 (free)', - description: - "GLM-4.7 is Z.AI's latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", - context_length: 202752, - max_completion_tokens: 65535, - is_enabled: false, - flags: ['reasoning', 'prompt_cache'], - gateway: 'openrouter', - internal_id: 'z-ai/glm-4.7', - inference_providers: ['novita'], -} as KiloFreeModel; - export const zai_glm5_free_model = { public_id: 'z-ai/glm-5:free', display_name: 'Z.ai: GLM 5 (free)',