From 63ada1ef952d05b81da148c4bf2e7a4a5afcd10d Mon Sep 17 00:00:00 2001 From: Revant Patel Date: Tue, 2 Jun 2026 16:09:32 -0700 Subject: [PATCH] remove openai request guard --- QUICKSTART.md | 8 ++++---- README.md | 8 ++++---- config.example.json | 12 ++++++------ scripts/claudia-claude.mjs | 4 ++-- scripts/presets.mjs | 6 +++--- scripts/providers.mjs | 2 +- scripts/release-smoke.mjs | 14 ++++++++++++++ tests/claudia-config.test.ts | 10 ++++++++-- tests/profile.test.ts | 6 +++--- tests/setup.test.ts | 11 +++++++++++ tests/status.test.ts | 2 +- 11 files changed, 57 insertions(+), 26 deletions(-) diff --git a/QUICKSTART.md b/QUICKSTART.md index 21327ee..4044773 100644 --- a/QUICKSTART.md +++ b/QUICKSTART.md @@ -44,7 +44,7 @@ If your NVIDIA key changes later, run `npm run key`. ## Advanced: NVIDIA NIM (Recommended for Quality) -NVIDIA hosted models like `z-ai/glm-5.1`, `z-ai/glm4.7`, and `qwen/qwen3.5-122b-a10b`. +NVIDIA hosted models like `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`, `z-ai/glm4.7`, and `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`. From the cloned repo root: ```sh @@ -130,9 +130,9 @@ claudia-claude --model local-model | `npm run release:check` | Release gate: typecheck + tests + build + package smoke | | `npm run config` | Re-run the configuration wizard | | `claudia-claude` | Launch Claude Code connected to the router | -| `npm run claude:fast` | Default long-context model (z-ai/glm-5.1) | +| `npm run claude:fast` | Default long-context model (nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16) | | `npm run claude:glm` | High-quality thinking model, slower on purpose (z-ai/glm4.7) | -| `npm run claude:qwen` | Backup coding model, less consistent on complex code (qwen/qwen3.5-122b-a10b) | +| `npm run claude:qwen` | Backup coding model, less consistent on complex code (nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16) | | `npm run claude:smoke` | Quick smoke test only (nemotron-mini-4b) | --- @@ -162,7 +162,7 @@ curl http://localhost:8082/v1/messages \ -H "x-api-key: dummy" \ -d '{ "model": "claude-3-5-sonnet-latest", - "max_tokens": 100, + "max_tokens": 4096, "messages": [{"role": "user", "content": "Say hello"}] }' ``` diff --git a/README.md b/README.md index d7fa000..b50a6fb 100644 --- a/README.md +++ b/README.md @@ -125,13 +125,13 @@ npm run claude:fast -- --managed-auth If you see a managed-login warning, remove `--managed-auth`. Claude managed credentials are sent only to the local router; your NVIDIA key is sent to NVIDIA by the router. -The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `z-ai/glm-5.1`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Qwen fallback, or `npm run claude:smoke` to test routing with the smallest configured model. +The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Nano fallback, or `npm run claude:smoke` to test routing with the smallest configured model. Model tradeoffs: - `fast`: best default for long prompts and coding; slower than smaller models, but much less likely to hit context limits - `glm`: stronger on hard tasks when it reasons longer, but slower -- `qwen`: backup option when you want a different model family, but less consistent on complex code +- `qwen`: backup option when you want a lighter fallback, but less consistent on complex code - `smoke`: smallest and quickest option for health checks, not real work ### Check the router @@ -150,7 +150,7 @@ curl http://localhost:8082/v1/messages \ -H "x-api-key: dummy" \ -d '{ "model": "claude-3-5-sonnet-latest", - "max_tokens": 512, + "max_tokens": 4096, "messages": [ { "role": "user", @@ -176,7 +176,7 @@ LOG_LEVEL=info 2. Keep `defaultBackend` set to `nvidia` in `config.json`. -3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`z-ai/glm-5.1`). +3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`). If you want to switch providers later, use `npm run init -- --provider openrouter` or `npm run init -- --provider local`. Use `npm run config` if you prefer the interactive provider picker. diff --git a/config.example.json b/config.example.json index 382691a..995659f 100644 --- a/config.example.json +++ b/config.example.json @@ -5,7 +5,7 @@ "nvidia": { "baseUrl": "https://integrate.api.nvidia.com/v1", "apiKeyEnv": "NVIDIA_API_KEY", - "defaultModel": "z-ai/glm-5.1" + "defaultModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" }, "openrouter": { "baseUrl": "https://openrouter.ai/api/v1", @@ -21,10 +21,10 @@ "modelProfiles": { "claude-3-5-sonnet-latest": { "backend": "nvidia", - "providerModel": "z-ai/glm-5.1", + "providerModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", "retryAttempts": 3, "retryBaseDelayMs": 500, - "notes": "Default long-context NVIDIA coding profile; better for big prompts, slightly slower than smaller models", + "notes": "Default long-context NVIDIA coding profile; stronger context window, slightly slower than smaller models", "capabilities": { "toolCalls": true, "coding": true @@ -66,10 +66,10 @@ }, "claude-3-5-sonnet-qwen": { "backend": "nvidia", - "providerModel": "qwen/qwen3.5-122b-a10b", + "providerModel": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "retryAttempts": 3, "retryBaseDelayMs": 500, - "notes": "Qwen fallback NVIDIA coding profile; useful as a backup, but less consistent on complex code", + "notes": "Nano fallback NVIDIA coding profile; useful as a backup, but lighter than the default", "capabilities": { "toolCalls": true, "coding": true @@ -90,7 +90,7 @@ "modelMap": { "legacy-claude-3-5-sonnet-latest": { "backend": "nvidia", - "model": "z-ai/glm-5.1" + "model": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" } } } diff --git a/scripts/claudia-claude.mjs b/scripts/claudia-claude.mjs index 34eac95..feedbf0 100755 --- a/scripts/claudia-claude.mjs +++ b/scripts/claudia-claude.mjs @@ -91,9 +91,9 @@ Claudia Router Model Profiles Shortcuts (use with --model or in npm scripts): - --model fast Default: z-ai/glm-5.1 (NVIDIA) — best long-context option, a bit slower + --model fast Default: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16 (NVIDIA) — best long-context option, a bit slower --model glm Thinking-heavy: z-ai/glm4.7 (NVIDIA) — slower, but better on hard tasks - --model qwen Fallback: qwen/qwen3.5-122b-a10b (NVIDIA) — useful fallback, less consistent + --model qwen Fallback: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 (NVIDIA) — useful fallback, less consistent --model smoke Lightweight: nvidia/nemotron-mini-4b-instruct (NVIDIA) — for quick checks only Built-in npm scripts: diff --git a/scripts/presets.mjs b/scripts/presets.mjs index b92f4ea..9a1ec19 100644 --- a/scripts/presets.mjs +++ b/scripts/presets.mjs @@ -6,7 +6,7 @@ export const PROFILE_PRESETS = { model: "claude-3-5-sonnet-latest", description: "Default long-context preset", nextCommand: "npm run claude:fast", - notes: "Default GLM-5.1 routing profile; strongest context window, but slower than smaller models" + notes: "Default NVIDIA Nemotron Super routing profile; strongest context window, but slower than smaller models" }, glm: { alias: "glm", @@ -20,7 +20,7 @@ export const PROFILE_PRESETS = { model: "claude-3-5-sonnet-qwen", description: "Fallback preset", nextCommand: "npm run claude:qwen", - notes: "Useful when GLM is unavailable, but less consistent on complex code" + notes: "Fallback NVIDIA Nemotron Nano routing profile; useful when the larger default is too heavy" }, smoke: { alias: "smoke", @@ -111,7 +111,7 @@ export function buildProfileModelProfiles(providerKey, provider) { modelProfiles[PROFILE_PRESETS.qwen.model] = { backend: providerKey, - providerModel: "qwen/qwen3.5-122b-a10b", + providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", retryAttempts: 3, retryBaseDelayMs: 500, notes: PROFILE_PRESETS.qwen.notes diff --git a/scripts/providers.mjs b/scripts/providers.mjs index c55bd68..f4ba389 100644 --- a/scripts/providers.mjs +++ b/scripts/providers.mjs @@ -4,7 +4,7 @@ export const PROVIDERS = { name: "NVIDIA NIM", baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "z-ai/glm-5.1", + defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", smokeModel: "nvidia/nemotron-mini-4b-instruct", requiresKey: true, description: "Long-context and coding-capable models hosted by NVIDIA" diff --git a/scripts/release-smoke.mjs b/scripts/release-smoke.mjs index 4fdf5e0..9159656 100644 --- a/scripts/release-smoke.mjs +++ b/scripts/release-smoke.mjs @@ -144,6 +144,20 @@ function main() { const nvidiaConfig = readJson(configPath); assert(nvidiaConfig.defaultBackend === "nvidia", `Expected defaultBackend=nvidia, got ${nvidiaConfig.defaultBackend}`); + assert( + nvidiaConfig.backends?.nvidia?.defaultModel === "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + `Expected NVIDIA defaultModel to use the Nemotron Super model, got ${nvidiaConfig.backends?.nvidia?.defaultModel}` + ); + assert( + nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-latest"]?.providerModel === + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", + "Expected fast profile to use the Nemotron Super model" + ); + assert( + nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-qwen"]?.providerModel === + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", + "Expected qwen fallback profile to use the Nemotron Nano model" + ); const nvidiaEnvFile = fs.readFileSync(envPath, "utf8"); assert(nvidiaEnvFile.includes("NVIDIA_API_KEY=nvidia-test-key"), ".env did not persist NVIDIA_API_KEY"); diff --git a/tests/claudia-config.test.ts b/tests/claudia-config.test.ts index 4ba8453..8ca746d 100644 --- a/tests/claudia-config.test.ts +++ b/tests/claudia-config.test.ts @@ -71,7 +71,13 @@ test("configuration wizard awaits remote connectivity before completion", async assert.match(logs.join("\n"), /OK Connected to nvidia successfully/); assert.match(logs.join("\n"), /Configuration complete!/); const config = JSON.parse(fs.readFileSync(path.join(cwd, "config.json"), "utf8")); - assert.equal(config.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, "z-ai/glm-5.1"); + assert.equal( + config.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + ); assert.equal(config.modelProfiles["claude-3-5-sonnet-glm"]?.providerModel, "z-ai/glm4.7"); - assert.equal(config.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, "qwen/qwen3.5-122b-a10b"); + assert.equal( + config.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" + ); }); diff --git a/tests/profile.test.ts b/tests/profile.test.ts index f21521a..e6af830 100644 --- a/tests/profile.test.ts +++ b/tests/profile.test.ts @@ -23,14 +23,14 @@ function writeNvidiaConfig(cwd: string): void { nvidia: { baseUrl: "https://integrate.api.nvidia.com/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "z-ai/glm-5.1" + defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" } }, modelMap: {}, modelProfiles: { "claude-3-5-sonnet-latest": { backend: "nvidia", - providerModel: "z-ai/glm-5.1" + providerModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" }, "claude-3-5-sonnet-glm": { backend: "nvidia", @@ -38,7 +38,7 @@ function writeNvidiaConfig(cwd: string): void { }, "claude-3-5-sonnet-qwen": { backend: "nvidia", - providerModel: "qwen/qwen3.5-122b-a10b" + providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" }, "claude-3-haiku-latest": { backend: "nvidia", diff --git a/tests/setup.test.ts b/tests/setup.test.ts index 4a118be..8ee72f6 100644 --- a/tests/setup.test.ts +++ b/tests/setup.test.ts @@ -54,6 +54,17 @@ test("creates setup files, prompts for a missing key, and runs the NVIDIA smoke assert.equal(authorization, "Bearer secret-test-key"); assert.equal(requestBody?.model, "nvidia/nemotron-mini-4b-instruct"); assert.equal(requestBody?.stream, false); + const generatedConfig = JSON.parse(fs.readFileSync(path.join(cwd, "config.json"), "utf8")) as { + modelProfiles: Record; + }; + assert.equal( + generatedConfig.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, + "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" + ); + assert.equal( + generatedConfig.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, + "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16" + ); assert.doesNotMatch(result.output, /secret-test-key/); assert.match(result.output, /Configuration complete!/); }); diff --git a/tests/status.test.ts b/tests/status.test.ts index 432e6bc..d2b7909 100644 --- a/tests/status.test.ts +++ b/tests/status.test.ts @@ -22,7 +22,7 @@ function createStatusDirectory(env = "NVIDIA_API_KEY=test-key\nCLAUDIA_CLAUDE_MO nvidia: { baseUrl: "https://example.invalid/v1", apiKeyEnv: "NVIDIA_API_KEY", - defaultModel: "z-ai/glm-5.1" + defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16" } }, modelMap: {},