From 244a0dd994f396af52bfb3083da798d0c11160a4 Mon Sep 17 00:00:00 2001 From: Revant Patel Date: Tue, 2 Jun 2026 17:23:56 -0700 Subject: [PATCH] Fix NVIDIA hosted model slugs --- QUICKSTART.md | 6 +++--- README.md | 4 ++-- config.example.json | 22 ++++++++++++++++++---- scripts/claudia-claude.mjs | 4 ++-- scripts/presets.mjs | 18 +++++++++++++++++- scripts/providers.mjs | 2 +- scripts/release-smoke.mjs | 6 +++--- tests/claudia-config.test.ts | 4 ++-- tests/profile.test.ts | 6 +++--- tests/setup.test.ts | 4 ++-- tests/status.test.ts | 2 +- 11 files changed, 54 insertions(+), 24 deletions(-) diff --git a/QUICKSTART.md b/QUICKSTART.md index 4044773..bb75b3d 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -44,7 +44,7 @@ If your NVIDIA key changes later, run `npm run key`. ## Advanced: NVIDIA NIM (Recommended for Quality) -NVIDIA hosted models like `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`, `z-ai/glm4.7`, and `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`. +NVIDIA hosted models like `nvidia/nemotron-3-super-120b-a12b`, `z-ai/glm4.7`, and `nvidia/nemotron-3-nano-30b-a3b`. From the cloned repo root: ```sh @@ -130,9 +130,9 @@ claudia-claude --model local-model | `npm run release:check` | Release gate: typecheck + tests + build + package smoke | | `npm run config` | Re-run the configuration wizard | | `claudia-claude` | Launch Claude Code connected to the router | -| `npm run claude:fast` | Default long-context model (nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16) | +| `npm run claude:fast` | Default long-context model (nvidia/nemotron-3-super-120b-a12b) | | `npm run claude:glm` | High-quality thinking model, slower on purpose (z-ai/glm4.7) | -| `npm run claude:qwen` | Backup coding model, less consistent on complex code (nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16) | +| `npm run claude:qwen` | Backup coding model, less consistent on complex code (nvidia/nemotron-3-nano-30b-a3b) | | `npm run claude:smoke` | Quick smoke test only (nemotron-mini-4b) | --- diff --git a/README.md b/README.md index b50a6fb..bc80b50 100644 --- a/README.md +++ b/README.md @@ -125,7 +125,7 @@ npm run claude:fast -- --managed-auth If you see a managed-login warning, remove `--managed-auth`. Claude managed credentials are sent only to the local router; your NVIDIA key is sent to NVIDIA by the router. -The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Nano fallback, or `npm run claude:smoke` to test routing with the smallest configured model. +The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `nvidia/nemotron-3-super-120b-a12b`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Nano fallback, or `npm run claude:smoke` to test routing with the smallest configured model. Model tradeoffs: @@ -176,7 +176,7 @@ LOG_LEVEL=info 2. Keep `defaultBackend` set to `nvidia` in `config.json`. -3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`). +3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`nvidia/nemotron-3-super-120b-a12b`). If you want to switch providers later, use `npm run init -- --provider openrouter` or `npm run init -- --provider local`. Use `npm run config` if you prefer the interactive provider picker. diff --git a/config.example.json b/config.example.json index 995659f..d2a4f1d 100644 --- a/config.example.json +++ b/config.example.json @@ -5,7 +5,7 @@ "nvidia": { "baseUrl": "https://integrate.api.nvidia.com/v1", "apiKeyEnv": "NVIDIA_API_KEY", - "defaultModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + "defaultModel": "nvidia/nemotron-3-super-120b-a12b" }, "openrouter": { "baseUrl": "https://openrouter.ai/api/v1", @@ -21,9 +21,17 @@ "modelProfiles": { "claude-3-5-sonnet-latest": { "backend": "nvidia", - "providerModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + "providerModel": "nvidia/nemotron-3-super-120b-a12b", "retryAttempts": 3, "retryBaseDelayMs": 500, + "extraBody": { + "chat_template_kwargs": { + "enable_thinking": false, + "force_nonempty_content": true + }, + "temperature": 1, + "top_p": 0.95 + }, "notes": "Default long-context NVIDIA coding profile; stronger context window, slightly slower than smaller models", "capabilities": { "toolCalls": true, @@ -66,9 +74,15 @@ }, "claude-3-5-sonnet-qwen": { "backend": "nvidia", - "providerModel": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "providerModel": "nvidia/nemotron-3-nano-30b-a3b", "retryAttempts": 3, "retryBaseDelayMs": 500, + "extraBody": { + "chat_template_kwargs": { + "enable_thinking": false, + "force_nonempty_content": true + } + }, "notes": "Nano fallback NVIDIA coding profile; useful as a backup, but lighter than the default", "capabilities": { "toolCalls": true, @@ -90,7 +104,7 @@ "modelMap": { "legacy-claude-3-5-sonnet-latest": { "backend": "nvidia", - "model": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + "model": "nvidia/nemotron-3-super-120b-a12b" } } } diff --git a/scripts/claudia-claude.mjs b/scripts/claudia-claude.mjs index feedbf0..3a71f91 100755 --- a/scripts/claudia-claude.mjs +++ b/scripts/claudia-claude.mjs @@ -91,9 +91,9 @@ Claudia Router Model Profiles Shortcuts (use with --model or in npm scripts): - --model fast Default: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16 (NVIDIA) — best long-context option, a bit slower + --model fast Default: nvidia/nemotron-3-super-120b-a12b (NVIDIA) — best long-context option, a bit slower --model glm Thinking-heavy: z-ai/glm4.7 (NVIDIA) — slower, but better on hard tasks - --model qwen Fallback: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 (NVIDIA) — useful fallback, less consistent + --model qwen Fallback: nvidia/nemotron-3-nano-30b-a3b (NVIDIA) — useful fallback, less consistent --model smoke Lightweight: nvidia/nemotron-mini-4b-instruct (NVIDIA) — for quick checks only Built-in npm scripts: diff --git a/scripts/presets.mjs b/scripts/presets.mjs index 9a1ec19..4f7c37b 100644 --- a/scripts/presets.mjs +++ b/scripts/presets.mjs @@ -83,6 +83,16 @@ export function buildProfileModelProfiles(providerKey, provider) { providerModel: provider.defaultModel, retryAttempts: 3, retryBaseDelayMs: 500, + extraBody: providerKey === "nvidia" + ? { + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true + }, + temperature: 1.0, + top_p: 0.95 + } + : undefined, notes: PROFILE_PRESETS.fast.notes }, [PROFILE_PRESETS.smoke.model]: { @@ -111,9 +121,15 @@ export function buildProfileModelProfiles(providerKey, provider) { modelProfiles[PROFILE_PRESETS.qwen.model] = { backend: providerKey, - providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + providerModel: "nvidia/nemotron-3-nano-30b-a3b", retryAttempts: 3, retryBaseDelayMs: 500, + extraBody: { + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true + } + }, notes: PROFILE_PRESETS.qwen.notes }; } diff --git a/scripts/providers.mjs b/scripts/providers.mjs index f4ba389..300f8a9 100644 --- a/scripts/providers.mjs +++ b/scripts/providers.mjs @@ -4,7 +4,7 @@ export const PROVIDERS = { name: "NVIDIA NIM", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + defaultModel: "nvidia/nemotron-3-super-120b-a12b", smokeModel: "nvidia/nemotron-mini-4b-instruct", requiresKey: true, description: "Long-context and coding-capable models hosted by NVIDIA" diff --git a/scripts/release-smoke.mjs b/scripts/release-smoke.mjs index 9159656..d2451ce 100644 --- a/scripts/release-smoke.mjs +++ b/scripts/release-smoke.mjs @@ -145,17 +145,17 @@ function main() { const nvidiaConfig = readJson(configPath); assert(nvidiaConfig.defaultBackend === "nvidia", `Expected defaultBackend=nvidia, got ${nvidiaConfig.defaultBackend}`); assert( - nvidiaConfig.backends?.nvidia?.defaultModel === "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + nvidiaConfig.backends?.nvidia?.defaultModel === "nvidia/nemotron-3-super-120b-a12b", `Expected NVIDIA defaultModel to use the Nemotron Super model, got ${nvidiaConfig.backends?.nvidia?.defaultModel}` ); assert( nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-latest"]?.providerModel === - "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + "nvidia/nemotron-3-super-120b-a12b", "Expected fast profile to use the Nemotron Super model" ); assert( nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-qwen"]?.providerModel === - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "nvidia/nemotron-3-nano-30b-a3b", "Expected qwen fallback profile to use the Nemotron Nano model" ); const nvidiaEnvFile = fs.readFileSync(envPath, "utf8"); diff --git a/tests/claudia-config.test.ts b/tests/claudia-config.test.ts index 8ca746d..db1a4f9 100644 --- a/tests/claudia-config.test.ts +++ b/tests/claudia-config.test.ts @@ -73,11 +73,11 @@ test("configuration wizard awaits remote connectivity before completion", async const config = JSON.parse(fs.readFileSync(path.join(cwd, "config.json"), "utf8")); assert.equal( config.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, - "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + "nvidia/nemotron-3-super-120b-a12b" ); assert.equal(config.modelProfiles["claude-3-5-sonnet-glm"]?.providerModel, "z-ai/glm4.7"); assert.equal( config.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" + "nvidia/nemotron-3-nano-30b-a3b" ); }); diff --git a/tests/profile.test.ts b/tests/profile.test.ts index e6af830..889ab94 100644 --- a/tests/profile.test.ts +++ b/tests/profile.test.ts @@ -23,14 +23,14 @@ function writeNvidiaConfig(cwd: string): void { nvidia: { baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + defaultModel: "nvidia/nemotron-3-super-120b-a12b" } }, modelMap: {}, modelProfiles: { "claude-3-5-sonnet-latest": { backend: "nvidia", - providerModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + providerModel: "nvidia/nemotron-3-super-120b-a12b" }, "claude-3-5-sonnet-glm": { backend: "nvidia", @@ -38,7 +38,7 @@ function writeNvidiaConfig(cwd: string): void { }, "claude-3-5-sonnet-qwen": { backend: "nvidia", - providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" + providerModel: "nvidia/nemotron-3-nano-30b-a3b" }, "claude-3-haiku-latest": { backend: "nvidia", diff --git a/tests/setup.test.ts b/tests/setup.test.ts index 8ee72f6..7746ac3 100644 --- a/tests/setup.test.ts +++ b/tests/setup.test.ts @@ -59,11 +59,11 @@ test("creates setup files, prompts for a missing key, and runs the NVIDIA smoke }; assert.equal( generatedConfig.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, - "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + "nvidia/nemotron-3-super-120b-a12b" ); assert.equal( generatedConfig.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, - "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" + "nvidia/nemotron-3-nano-30b-a3b" ); assert.doesNotMatch(result.output, /secret-test-key/); assert.match(result.output, /Configuration complete!/); diff --git a/tests/status.test.ts b/tests/status.test.ts index d2b7909..2ed2681 100644 --- a/tests/status.test.ts +++ b/tests/status.test.ts @@ -22,7 +22,7 @@ function createStatusDirectory(env = "NVIDIA_API_KEY=test-key\nCLAUDIA_CLAUDE_MO nvidia: { baseUrl: "https://example.invalid/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + defaultModel: "nvidia/nemotron-3-super-120b-a12b" } }, modelMap: {},