NVIDIA · cv · Jun 3, 2026 · Jun 3, 2026
diff --git a/src/lib/actions/sandbox/snapshot.ts b/src/lib/actions/sandbox/snapshot.ts
@@ -216,6 +216,10 @@ async function autoCreateSandboxFromSource(
     // dst has its own lifecycle; don't inherit src's local NIM container
     // reference, or destroying dst would stop src's NIM.
     nimContainer: null,
+    // No CUDA proof has run for dst (this auto-create path passes no GPU flags),
+    // so clear src's proof rather than inheriting it — otherwise dst could show
+    // `Sandbox GPU: enabled (CUDA verified)` based on another sandbox's run (#4231).
+    sandboxGpuProof: null,
   });
 
   console.log(`  ${G}\u2713${R} Sandbox '${dstName}' created`);

diff --git a/src/lib/actions/sandbox/status-snapshot.ts b/src/lib/actions/sandbox/status-snapshot.ts
@@ -26,8 +26,8 @@ import {
 import { probeSandboxInferenceGatewayHealth } from "./process-recovery";
 import {
   getSandboxStatusPreflight,
-  withoutTerminalPhasePreflight,
   type SandboxStatusFailureLayer,
+  withoutTerminalPhasePreflight,
 } from "./status-preflight";
 
 type ProbeProviderHealth = (
@@ -84,6 +84,9 @@ export interface SandboxStatusReport {
   sandboxGpuEnabled: boolean;
   sandboxGpuMode: string | null;
   sandboxGpuDevice: string | null;
+  // Last recorded CUDA-usability proof so `status` can distinguish a configured
+  // GPU from a proven-usable one instead of reporting any GPU as healthy (#4231).
+  sandboxGpuProof: registry.SandboxGpuProofResult | null;
   openshellDriver: string;
   openshellVersion: string;
   policies: string[];
@@ -222,6 +225,7 @@ export async function getSandboxStatusReport(
     sandboxGpuEnabled,
     sandboxGpuMode: (sb && sb.sandboxGpuMode) || null,
     sandboxGpuDevice: (sb && sb.sandboxGpuDevice) || null,
+    sandboxGpuProof: (sb && sb.sandboxGpuProof) || null,
     openshellDriver: (sb && sb.openshellDriver) || "unknown",
     openshellVersion: (sb && sb.openshellVersion) || "unknown",
     policies,

diff --git a/src/lib/actions/sandbox/status.test.ts b/src/lib/actions/sandbox/status.test.ts
@@ -2,15 +2,16 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { describe, expect, it } from "vitest";
-
-import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";
 import {
   classifySandboxContainerFailureForStatus,
   classifySandboxStatusPreflightFailure,
   getSandboxStatusInferenceHealth,
   isDockerDaemonUnreachableForStatus,
   maybeGetSandboxStatusInferenceHealth,
+  sandboxGpuProofStatusSuffix,
+  sandboxGpuProofUnverified,
 } from "../../../../dist/lib/actions/sandbox/status";
+import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";
 
 describe("sandbox status inference health", () => {
   it("passes the current model with the current provider", () => {
@@ -276,3 +277,38 @@ describe("classifySandboxStatusPreflightFailure", () => {
     expect(result).toBeNull();
   });
 });
+
+describe("sandbox GPU proof status rendering (#4231)", () => {
+  it("does not call an unproven GPU healthy", () => {
+    expect(sandboxGpuProofUnverified(null)).toBe(true);
+    expect(sandboxGpuProofUnverified(undefined)).toBe(true);
+    expect(
+      sandboxGpuProofUnverified({ status: "unverified", cudaVerified: false, at: "t" }),
+    ).toBe(true);
+    expect(
+      sandboxGpuProofUnverified({ status: "verified", cudaVerified: true, at: "t" }),
+    ).toBe(false);
+    expect(
+      sandboxGpuProofUnverified({ status: "failed", cudaVerified: false, at: "t" }),
+    ).toBe(false);
+  });
+
+  it("renders verified / unverified / failed suffixes distinctly", () => {
+    expect(
+      sandboxGpuProofStatusSuffix({ status: "verified", cudaVerified: true, at: "t" }),
+    ).toContain("CUDA verified");
+    // No recorded proof (older entries) must not read as healthy.
+    expect(sandboxGpuProofStatusSuffix(null)).toContain("CUDA unverified");
+    expect(
+      sandboxGpuProofStatusSuffix({ status: "unverified", cudaVerified: false, at: "t" }),
+    ).toContain("CUDA unverified");
+    const failed = sandboxGpuProofStatusSuffix({
+      status: "failed",
+      cudaVerified: false,
+      label: "cuInit(0)",
+      at: "t",
+    });
+    expect(failed).toContain("last CUDA proof failed");
+    expect(failed).toContain("cuInit(0)");
+  });
+});
diff --git a/src/lib/actions/sandbox/status.ts b/src/lib/actions/sandbox/status.ts
@@ -12,6 +12,7 @@ import * as nim from "../../inference/nim";
 import * as sandboxVersion from "../../sandbox/version";
 import * as shields from "../../shields";
 import { isTerminalSandboxPhase, parseSandboxPhase } from "../../state/gateway";
+import type { SandboxGpuProofResult } from "../../state/registry";
 import * as registry from "../../state/registry";
 import {
   createSystemDeps as createSessionDeps,
@@ -28,26 +29,26 @@ import {
   printWrongGatewayActiveGuidance,
 } from "./gateway-state";
 import { isSandboxGatewayRunningForStatus } from "./process-recovery";
-import { collectSandboxStatusSnapshot } from "./status-snapshot";
 import {
   getSandboxStatusPreflight,
   printGatewayFailureLayerHeader,
   printSandboxStatusPreflightHeader,
   withoutTerminalPhasePreflight,
 } from "./status-preflight";
+import { collectSandboxStatusSnapshot } from "./status-snapshot";
 
 export {
+  type ClassifySandboxStatusPreflightFailureDeps,
   classifySandboxContainerFailureForStatus,
   classifySandboxStatusPreflightFailure,
-  isDockerDaemonUnreachableForStatus,
   getSandboxStatusPreflight,
+  isDockerDaemonUnreachableForStatus,
   printGatewayFailureLayerHeader,
   printSandboxStatusPreflightHeader,
-  withoutTerminalPhasePreflight,
-  type ClassifySandboxStatusPreflightFailureDeps,
   type SandboxStatusFailureLayer,
   type SandboxStatusPreflightFailure,
   type SandboxStatusPreflightResult,
+  withoutTerminalPhasePreflight,
 } from "./status-preflight";
 export {
   collectSandboxStatusSnapshot,
@@ -68,6 +69,29 @@ function shouldProbeSandboxRuntimeVersion(
   return lookup.state === "present" && Boolean(sandbox.agentVersion);
 }
 
+// True when sandbox GPU is enabled but no CUDA-usability proof has confirmed it
+// (older entries with no recorded proof, or a run whose CUDA proof could not
+// execute). Treated as not-yet-proven rather than healthy (#4231).
+export function sandboxGpuProofUnverified(
+  proof: SandboxGpuProofResult | null | undefined,
+): boolean {
+  return !proof || proof.status === "unverified";
+}
+
+// Render the proof-state suffix appended to the `Sandbox GPU: enabled` line so
+// the status reflects verified/unverified/failed CUDA usability instead of
+// reporting any configured GPU as healthy (#4231).
+export function sandboxGpuProofStatusSuffix(
+  proof: SandboxGpuProofResult | null | undefined,
+): string {
+  if (proof?.status === "verified") return ` ${G}(CUDA verified)${R}`;
+  if (proof?.status === "failed") {
+    const label = proof.label ? `: ${proof.label}` : "";
+    return ` ${RD}(last CUDA proof failed${label})${R}`;
+  }
+  return ` ${YW}(CUDA unverified)${R}`;
+}
+
 /**
  * Render one Inference status line. The main probe and each subprobe go
  * through this helper so multi-hop providers (e.g. ollama-local backend +
@@ -183,10 +207,26 @@ export async function showSandboxStatus(sandboxName: string): Promise<void> {
     const sandboxGpu = sandboxGpuEnabled ? "enabled" : "disabled";
     const sandboxGpuMode = sb.sandboxGpuMode ? ` (${sb.sandboxGpuMode})` : "";
     const sandboxGpuDevice = sb.sandboxGpuDevice ? ` device=${sb.sandboxGpuDevice}` : "";
+    const sandboxGpuProofSuffix = sandboxGpuEnabled
+      ? sandboxGpuProofStatusSuffix(sb.sandboxGpuProof)
+      : "";
     const openshellDriver = sb.openshellDriver || "unknown";
     const openshellVersion = sb.openshellVersion || "unknown";
     console.log(`    Host GPU: ${hostGpu}`);
-    console.log(`    Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}`);
+    console.log(
+      `    Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}${sandboxGpuProofSuffix}`,
+    );
+    if (sandboxGpuEnabled && sb.sandboxGpuProof?.status === "failed") {
+      const detail = sb.sandboxGpuProof.detail;
+      if (detail) console.log(`      ${detail}`);
+      console.log(
+        "      CUDA failed a live proof. Recreate with corrected GPU device/group access, or rerun onboard with --no-gpu.",
+      );
+    } else if (sandboxGpuEnabled && sandboxGpuProofUnverified(sb.sandboxGpuProof)) {
+      console.log(
+        "      CUDA usability has not been proven. Rerun onboard to verify, or use --no-gpu for CPU.",
+      );
+    }
     console.log(`    OpenShell: ${openshellVersion} (${openshellDriver})`);
     console.log(`    Policies: ${(sb.policies || []).join(", ") || "none"}`);
 

diff --git a/src/lib/inference/gpu-trust.ts b/src/lib/inference/gpu-trust.ts
@@ -25,6 +25,27 @@ export function isDenylistedNvidiaGpuName(name: string): boolean {
   return NVIDIA_GPU_NAME_DENYLIST_PATTERN.test(name);
 }
 
+// Result of a bounded Docker `--gpus` CUDA proof. `passed` is true only when a
+// real CUDA workload (not just nvidia-smi) succeeded — that is the signal that
+// distinguishes a genuine Windows-ARM N1X + WSL2 + Docker Desktop GPU (#4565)
+// from the Windows-on-ARM Snapdragon nvidia-smi shim (#3988/#4424), which has
+// no usable NVIDIA device and so cannot pass the workload.
+export interface DockerGpuProofResult {
+  passed: boolean;
+  timedOut: boolean;
+  exitCode: number | null;
+  diagnostic: string;
+}
+
+// Optional accept-path used by `detectGpu()` when an ARM64 Linux host reports a
+// denylisted `JMJWOA-Generic-*` placeholder. The prover returns `null` when the
+// host is not a proof candidate (not ARM64 WSL Docker Desktop), preserving the
+// #3988 fail-closed default; otherwise it returns the bounded Docker GPU proof
+// outcome so a passing real GPU can be trusted without trusting the name alone.
+export type Arm64WslDockerDesktopGpuProver = (
+  gpuNames: string[],
+) => DockerGpuProofResult | null;
+
 export function isPlausibleNvidiaGpuName(name: string): boolean {
   return !!name && !isDenylistedNvidiaGpuName(name) && NVIDIA_GPU_NAME_PATTERN.test(name);
 }

diff --git a/src/lib/inference/nim.test.ts b/src/lib/inference/nim.test.ts
@@ -453,6 +453,99 @@ describe("nim", () => {
       }
     });
 
+    // #4565: a real Windows-ARM N1X + WSL2 + Docker Desktop host reports the
+    // same `JMJWOA-Generic-*` placeholder as the Snapdragon shim, but it can
+    // pass a bounded Docker `--gpus` CUDA proof. When the injected prover
+    // confirms the proof, the denylisted name is accepted and the detection is
+    // tagged so the sandbox preflight reaches the Docker Desktop WSL branch.
+    it("accepts a denylisted ARM64 GPU when the bounded Docker GPU proof passes (#4565)", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+      const proveArm64WslDockerDesktopGpu = vi.fn(() => ({
+        passed: true,
+        timedOut: false,
+        exitCode: 0,
+        diagnostic: "",
+      }));
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          const result = nimModule.detectGpu({ proveArm64WslDockerDesktopGpu });
+          expect(result).toMatchObject({
+            type: "nvidia",
+            name: "JMJWOA-Generic-GPU",
+            count: 1,
+            totalMemoryMB: 65471,
+            wslDockerDesktopGpuProofPassed: true,
+          });
+          expect(proveArm64WslDockerDesktopGpu).toHaveBeenCalledWith(["JMJWOA-Generic-GPU"]);
+        });
+      } finally {
+        restore();
+      }
+    });
+
+    // Snapdragon WoA fail-closed: the same placeholder name, but the bounded
+    // CUDA proof fails because there is no usable NVIDIA device. The detection
+    // must stay null so #3988/#4424 is not reopened.
+    it("keeps rejecting a denylisted ARM64 GPU when the Docker GPU proof fails (#4565/#3988)", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+      const failingProver = vi.fn(() => ({
+        passed: false,
+        timedOut: false,
+        exitCode: 1,
+        diagnostic: "no CUDA-capable device is detected",
+      }));
+      const notCandidateProver = vi.fn(() => null);
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: failingProver })).toBeNull();
+          // A host that is not an ARM64 WSL Docker Desktop candidate returns
+          // null from the prover and must also fail closed (no proof attempted).
+          expect(
+            nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: notCandidateProver }),
+          ).toBeNull();
+        });
+      } finally {
+        restore();
+      }
+    });
+
+    // When no prover is wired (deps explicitly null), the denylist stays
+    // fail-closed exactly as before the #4565 accept-path existed.
+    it("rejects a denylisted ARM64 GPU when no Docker GPU prover is provided", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: null })).toBeNull();
+        });
+      } finally {
+        restore();
+      }
+    });
+
     // Trust-tier gate: on ARM64 Linux with generic firmware, the absence of
     // `/proc/driver/nvidia/` is the Windows-on-ARM WSL shim profile and must
     // be rejected even when the nvidia-smi probe returns a plausible-looking