From 63ada1ef952d05b81da148c4bf2e7a4a5afcd10d Mon Sep 17 00:00:00 2001
From: Revant Patel <revant.h.patel@gmail.com>
Date: Tue, 2 Jun 2026 16:09:32 -0700
Subject: [PATCH] remove openai request guard

---
 QUICKSTART.md                |  8 ++++----
 README.md                    |  8 ++++----
 config.example.json          | 12 ++++++------
 scripts/claudia-claude.mjs   |  4 ++--
 scripts/presets.mjs          |  6 +++---
 scripts/providers.mjs        |  2 +-
 scripts/release-smoke.mjs    | 14 ++++++++++++++
 tests/claudia-config.test.ts | 10 ++++++++--
 tests/profile.test.ts        |  6 +++---
 tests/setup.test.ts          | 11 +++++++++++
 tests/status.test.ts         |  2 +-
 11 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/QUICKSTART.md b/QUICKSTART.md
index 21327ee..4044773 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -44,7 +44,7 @@ If your NVIDIA key changes later, run `npm run key`.
 
 ## Advanced: NVIDIA NIM (Recommended for Quality)
 
-NVIDIA hosted models like `z-ai/glm-5.1`, `z-ai/glm4.7`, and `qwen/qwen3.5-122b-a10b`.
+NVIDIA hosted models like `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`, `z-ai/glm4.7`, and `nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16`.
 From the cloned repo root:
 
 ```sh
@@ -130,9 +130,9 @@ claudia-claude --model local-model
 | `npm run release:check` | Release gate: typecheck + tests + build + package smoke |
 | `npm run config` | Re-run the configuration wizard |
 | `claudia-claude` | Launch Claude Code connected to the router |
-| `npm run claude:fast` | Default long-context model (z-ai/glm-5.1) |
+| `npm run claude:fast` | Default long-context model (nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16) |
 | `npm run claude:glm` | High-quality thinking model, slower on purpose (z-ai/glm4.7) |
-| `npm run claude:qwen` | Backup coding model, less consistent on complex code (qwen/qwen3.5-122b-a10b) |
+| `npm run claude:qwen` | Backup coding model, less consistent on complex code (nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16) |
 | `npm run claude:smoke` | Quick smoke test only (nemotron-mini-4b) |
 
 ---
@@ -162,7 +162,7 @@ curl http://localhost:8082/v1/messages \
   -H "x-api-key: dummy" \
   -d '{
     "model": "claude-3-5-sonnet-latest",
-    "max_tokens": 100,
+    "max_tokens": 4096,
     "messages": [{"role": "user", "content": "Say hello"}]
   }'
 ```
diff --git a/README.md b/README.md
index d7fa000..b50a6fb 100644
--- a/README.md
+++ b/README.md
@@ -125,13 +125,13 @@ npm run claude:fast -- --managed-auth
 
 If you see a managed-login warning, remove `--managed-auth`. Claude managed credentials are sent only to the local router; your NVIDIA key is sent to NVIDIA by the router.
 
-The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `z-ai/glm-5.1`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Qwen fallback, or `npm run claude:smoke` to test routing with the smallest configured model.
+The fast script and default wrapper route `claude-3-5-sonnet-latest` to NVIDIA `nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`. Use `npm run claude:glm` for the slower thinking-heavy GLM quality profile, `npm run claude:qwen` for the Nano fallback, or `npm run claude:smoke` to test routing with the smallest configured model.
 
 Model tradeoffs:
 
 - `fast`: best default for long prompts and coding; slower than smaller models, but much less likely to hit context limits
 - `glm`: stronger on hard tasks when it reasons longer, but slower
-- `qwen`: backup option when you want a different model family, but less consistent on complex code
+- `qwen`: backup option when you want a lighter fallback, but less consistent on complex code
 - `smoke`: smallest and quickest option for health checks, not real work
 
 ### Check the router
@@ -150,7 +150,7 @@ curl http://localhost:8082/v1/messages \
   -H "x-api-key: dummy" \
   -d '{
     "model": "claude-3-5-sonnet-latest",
-    "max_tokens": 512,
+    "max_tokens": 4096,
     "messages": [
       {
         "role": "user",
@@ -176,7 +176,7 @@ LOG_LEVEL=info
 
 2. Keep `defaultBackend` set to `nvidia` in `config.json`.
 
-3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`z-ai/glm-5.1`).
+3. Use a mapped Claude-style model alias such as `claude-3-5-sonnet-latest`, or send any model name and Claudia Router will use the NVIDIA backend default model (`nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16`).
 
 If you want to switch providers later, use `npm run init -- --provider openrouter` or `npm run init -- --provider local`. Use `npm run config` if you prefer the interactive provider picker.
 
diff --git a/config.example.json b/config.example.json
index 382691a..995659f 100644
--- a/config.example.json
+++ b/config.example.json
@@ -5,7 +5,7 @@
     "nvidia": {
       "baseUrl": "https://integrate.api.nvidia.com/v1",
       "apiKeyEnv": "NVIDIA_API_KEY",
-      "defaultModel": "z-ai/glm-5.1"
+      "defaultModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
     },
     "openrouter": {
       "baseUrl": "https://openrouter.ai/api/v1",
@@ -21,10 +21,10 @@
   "modelProfiles": {
     "claude-3-5-sonnet-latest": {
       "backend": "nvidia",
-      "providerModel": "z-ai/glm-5.1",
+      "providerModel": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
       "retryAttempts": 3,
       "retryBaseDelayMs": 500,
-      "notes": "Default long-context NVIDIA coding profile; better for big prompts, slightly slower than smaller models",
+      "notes": "Default long-context NVIDIA coding profile; stronger context window, slightly slower than smaller models",
       "capabilities": {
         "toolCalls": true,
         "coding": true
@@ -66,10 +66,10 @@
     },
     "claude-3-5-sonnet-qwen": {
       "backend": "nvidia",
-      "providerModel": "qwen/qwen3.5-122b-a10b",
+      "providerModel": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
       "retryAttempts": 3,
       "retryBaseDelayMs": 500,
-      "notes": "Qwen fallback NVIDIA coding profile; useful as a backup, but less consistent on complex code",
+      "notes": "Nano fallback NVIDIA coding profile; useful as a backup, but lighter than the default",
       "capabilities": {
         "toolCalls": true,
         "coding": true
@@ -90,7 +90,7 @@
   "modelMap": {
     "legacy-claude-3-5-sonnet-latest": {
       "backend": "nvidia",
-      "model": "z-ai/glm-5.1"
+      "model": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
     }
   }
 }
diff --git a/scripts/claudia-claude.mjs b/scripts/claudia-claude.mjs
index 34eac95..feedbf0 100755
--- a/scripts/claudia-claude.mjs
+++ b/scripts/claudia-claude.mjs
@@ -91,9 +91,9 @@ Claudia Router Model Profiles
 
 Shortcuts (use with --model or in npm scripts):
 
-  --model fast     Default: z-ai/glm-5.1 (NVIDIA) — best long-context option, a bit slower
+  --model fast     Default: nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16 (NVIDIA) — best long-context option, a bit slower
   --model glm      Thinking-heavy: z-ai/glm4.7 (NVIDIA) — slower, but better on hard tasks
-  --model qwen     Fallback: qwen/qwen3.5-122b-a10b (NVIDIA) — useful fallback, less consistent
+  --model qwen     Fallback: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 (NVIDIA) — useful fallback, less consistent
   --model smoke    Lightweight: nvidia/nemotron-mini-4b-instruct (NVIDIA) — for quick checks only
 
   Built-in npm scripts:
diff --git a/scripts/presets.mjs b/scripts/presets.mjs
index b92f4ea..9a1ec19 100644
--- a/scripts/presets.mjs
+++ b/scripts/presets.mjs
@@ -6,7 +6,7 @@ export const PROFILE_PRESETS = {
     model: "claude-3-5-sonnet-latest",
     description: "Default long-context preset",
     nextCommand: "npm run claude:fast",
-    notes: "Default GLM-5.1 routing profile; strongest context window, but slower than smaller models"
+    notes: "Default NVIDIA Nemotron Super routing profile; strongest context window, but slower than smaller models"
   },
   glm: {
     alias: "glm",
@@ -20,7 +20,7 @@ export const PROFILE_PRESETS = {
     model: "claude-3-5-sonnet-qwen",
     description: "Fallback preset",
     nextCommand: "npm run claude:qwen",
-    notes: "Useful when GLM is unavailable, but less consistent on complex code"
+    notes: "Fallback NVIDIA Nemotron Nano routing profile; useful when the larger default is too heavy"
   },
   smoke: {
     alias: "smoke",
@@ -111,7 +111,7 @@ export function buildProfileModelProfiles(providerKey, provider) {
 
     modelProfiles[PROFILE_PRESETS.qwen.model] = {
       backend: providerKey,
-      providerModel: "qwen/qwen3.5-122b-a10b",
+      providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
       retryAttempts: 3,
       retryBaseDelayMs: 500,
       notes: PROFILE_PRESETS.qwen.notes
diff --git a/scripts/providers.mjs b/scripts/providers.mjs
index c55bd68..f4ba389 100644
--- a/scripts/providers.mjs
+++ b/scripts/providers.mjs
@@ -4,7 +4,7 @@ export const PROVIDERS = {
     name: "NVIDIA NIM",
     baseUrl: "https://integrate.api.nvidia.com/v1",
     apiKeyEnv: "NVIDIA_API_KEY",
-    defaultModel: "z-ai/glm-5.1",
+    defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
     smokeModel: "nvidia/nemotron-mini-4b-instruct",
     requiresKey: true,
     description: "Long-context and coding-capable models hosted by NVIDIA"
diff --git a/scripts/release-smoke.mjs b/scripts/release-smoke.mjs
index 4fdf5e0..9159656 100644
--- a/scripts/release-smoke.mjs
+++ b/scripts/release-smoke.mjs
@@ -144,6 +144,20 @@ function main() {
 
     const nvidiaConfig = readJson(configPath);
     assert(nvidiaConfig.defaultBackend === "nvidia", `Expected defaultBackend=nvidia, got ${nvidiaConfig.defaultBackend}`);
+    assert(
+      nvidiaConfig.backends?.nvidia?.defaultModel === "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
+      `Expected NVIDIA defaultModel to use the Nemotron Super model, got ${nvidiaConfig.backends?.nvidia?.defaultModel}`
+    );
+    assert(
+      nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-latest"]?.providerModel ===
+        "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
+      "Expected fast profile to use the Nemotron Super model"
+    );
+    assert(
+      nvidiaConfig.modelProfiles?.["claude-3-5-sonnet-qwen"]?.providerModel ===
+        "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+      "Expected qwen fallback profile to use the Nemotron Nano model"
+    );
     const nvidiaEnvFile = fs.readFileSync(envPath, "utf8");
     assert(nvidiaEnvFile.includes("NVIDIA_API_KEY=nvidia-test-key"), ".env did not persist NVIDIA_API_KEY");
 
diff --git a/tests/claudia-config.test.ts b/tests/claudia-config.test.ts
index 4ba8453..8ca746d 100644
--- a/tests/claudia-config.test.ts
+++ b/tests/claudia-config.test.ts
@@ -71,7 +71,13 @@ test("configuration wizard awaits remote connectivity before completion", async
   assert.match(logs.join("\n"), /OK Connected to nvidia successfully/);
   assert.match(logs.join("\n"), /Configuration complete!/);
   const config = JSON.parse(fs.readFileSync(path.join(cwd, "config.json"), "utf8"));
-  assert.equal(config.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel, "z-ai/glm-5.1");
+  assert.equal(
+    config.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel,
+    "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
+  );
   assert.equal(config.modelProfiles["claude-3-5-sonnet-glm"]?.providerModel, "z-ai/glm4.7");
-  assert.equal(config.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel, "qwen/qwen3.5-122b-a10b");
+  assert.equal(
+    config.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel,
+    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+  );
 });
diff --git a/tests/profile.test.ts b/tests/profile.test.ts
index f21521a..e6af830 100644
--- a/tests/profile.test.ts
+++ b/tests/profile.test.ts
@@ -23,14 +23,14 @@ function writeNvidiaConfig(cwd: string): void {
           nvidia: {
             baseUrl: "https://integrate.api.nvidia.com/v1",
             apiKeyEnv: "NVIDIA_API_KEY",
-            defaultModel: "z-ai/glm-5.1"
+            defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
           }
         },
         modelMap: {},
         modelProfiles: {
           "claude-3-5-sonnet-latest": {
             backend: "nvidia",
-            providerModel: "z-ai/glm-5.1"
+            providerModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
           },
           "claude-3-5-sonnet-glm": {
             backend: "nvidia",
@@ -38,7 +38,7 @@ function writeNvidiaConfig(cwd: string): void {
           },
           "claude-3-5-sonnet-qwen": {
             backend: "nvidia",
-            providerModel: "qwen/qwen3.5-122b-a10b"
+            providerModel: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
           },
           "claude-3-haiku-latest": {
             backend: "nvidia",
diff --git a/tests/setup.test.ts b/tests/setup.test.ts
index 4a118be..8ee72f6 100644
--- a/tests/setup.test.ts
+++ b/tests/setup.test.ts
@@ -54,6 +54,17 @@ test("creates setup files, prompts for a missing key, and runs the NVIDIA smoke
   assert.equal(authorization, "Bearer secret-test-key");
   assert.equal(requestBody?.model, "nvidia/nemotron-mini-4b-instruct");
   assert.equal(requestBody?.stream, false);
+  const generatedConfig = JSON.parse(fs.readFileSync(path.join(cwd, "config.json"), "utf8")) as {
+    modelProfiles: Record<string, { providerModel?: string }>;
+  };
+  assert.equal(
+    generatedConfig.modelProfiles["claude-3-5-sonnet-latest"]?.providerModel,
+    "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
+  );
+  assert.equal(
+    generatedConfig.modelProfiles["claude-3-5-sonnet-qwen"]?.providerModel,
+    "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16"
+  );
   assert.doesNotMatch(result.output, /secret-test-key/);
   assert.match(result.output, /Configuration complete!/);
 });
diff --git a/tests/status.test.ts b/tests/status.test.ts
index 432e6bc..d2b7909 100644
--- a/tests/status.test.ts
+++ b/tests/status.test.ts
@@ -22,7 +22,7 @@ function createStatusDirectory(env = "NVIDIA_API_KEY=test-key\nCLAUDIA_CLAUDE_MO
           nvidia: {
             baseUrl: "https://example.invalid/v1",
             apiKeyEnv: "NVIDIA_API_KEY",
-            defaultModel: "z-ai/glm-5.1"
+            defaultModel: "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16"
           }
         },
         modelMap: {},