diff --git a/src/lib/actions/sandbox/snapshot.ts b/src/lib/actions/sandbox/snapshot.ts
index 8a12254087..0c94d8e54d 100644
--- a/src/lib/actions/sandbox/snapshot.ts
+++ b/src/lib/actions/sandbox/snapshot.ts
@@ -216,6 +216,10 @@ async function autoCreateSandboxFromSource(
     // dst has its own lifecycle; don't inherit src's local NIM container
     // reference, or destroying dst would stop src's NIM.
     nimContainer: null,
+    // No CUDA proof has run for dst (this auto-create path passes no GPU flags),
+    // so clear src's proof rather than inheriting it — otherwise dst could show
+    // `Sandbox GPU: enabled (CUDA verified)` based on another sandbox's run (#4231).
+    sandboxGpuProof: null,
   });
 
   console.log(`  ${G}\u2713${R} Sandbox '${dstName}' created`);
diff --git a/src/lib/actions/sandbox/status-snapshot.ts b/src/lib/actions/sandbox/status-snapshot.ts
index 6073fc5237..5ad989d37e 100644
--- a/src/lib/actions/sandbox/status-snapshot.ts
+++ b/src/lib/actions/sandbox/status-snapshot.ts
@@ -26,8 +26,8 @@ import {
 import { probeSandboxInferenceGatewayHealth } from "./process-recovery";
 import {
   getSandboxStatusPreflight,
-  withoutTerminalPhasePreflight,
   type SandboxStatusFailureLayer,
+  withoutTerminalPhasePreflight,
 } from "./status-preflight";
 
 type ProbeProviderHealth = (
@@ -84,6 +84,9 @@ export interface SandboxStatusReport {
   sandboxGpuEnabled: boolean;
   sandboxGpuMode: string | null;
   sandboxGpuDevice: string | null;
+  // Last recorded CUDA-usability proof so `status` can distinguish a configured
+  // GPU from a proven-usable one instead of reporting any GPU as healthy (#4231).
+  sandboxGpuProof: registry.SandboxGpuProofResult | null;
   openshellDriver: string;
   openshellVersion: string;
   policies: string[];
@@ -222,6 +225,7 @@ export async function getSandboxStatusReport(
     sandboxGpuEnabled,
     sandboxGpuMode: (sb && sb.sandboxGpuMode) || null,
     sandboxGpuDevice: (sb && sb.sandboxGpuDevice) || null,
+    sandboxGpuProof: (sb && sb.sandboxGpuProof) || null,
     openshellDriver: (sb && sb.openshellDriver) || "unknown",
     openshellVersion: (sb && sb.openshellVersion) || "unknown",
     policies,
diff --git a/src/lib/actions/sandbox/status.test.ts b/src/lib/actions/sandbox/status.test.ts
index 9722fc0399..cd5eba214c 100644
--- a/src/lib/actions/sandbox/status.test.ts
+++ b/src/lib/actions/sandbox/status.test.ts
@@ -2,15 +2,16 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { describe, expect, it } from "vitest";
-
-import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";
 import {
   classifySandboxContainerFailureForStatus,
   classifySandboxStatusPreflightFailure,
   getSandboxStatusInferenceHealth,
   isDockerDaemonUnreachableForStatus,
   maybeGetSandboxStatusInferenceHealth,
+  sandboxGpuProofStatusSuffix,
+  sandboxGpuProofUnverified,
 } from "../../../../dist/lib/actions/sandbox/status";
+import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";
 
 describe("sandbox status inference health", () => {
   it("passes the current model with the current provider", () => {
@@ -276,3 +277,38 @@ describe("classifySandboxStatusPreflightFailure", () => {
     expect(result).toBeNull();
   });
 });
+
+describe("sandbox GPU proof status rendering (#4231)", () => {
+  it("does not call an unproven GPU healthy", () => {
+    expect(sandboxGpuProofUnverified(null)).toBe(true);
+    expect(sandboxGpuProofUnverified(undefined)).toBe(true);
+    expect(
+      sandboxGpuProofUnverified({ status: "unverified", cudaVerified: false, at: "t" }),
+    ).toBe(true);
+    expect(
+      sandboxGpuProofUnverified({ status: "verified", cudaVerified: true, at: "t" }),
+    ).toBe(false);
+    expect(
+      sandboxGpuProofUnverified({ status: "failed", cudaVerified: false, at: "t" }),
+    ).toBe(false);
+  });
+
+  it("renders verified / unverified / failed suffixes distinctly", () => {
+    expect(
+      sandboxGpuProofStatusSuffix({ status: "verified", cudaVerified: true, at: "t" }),
+    ).toContain("CUDA verified");
+    // No recorded proof (older entries) must not read as healthy.
+    expect(sandboxGpuProofStatusSuffix(null)).toContain("CUDA unverified");
+    expect(
+      sandboxGpuProofStatusSuffix({ status: "unverified", cudaVerified: false, at: "t" }),
+    ).toContain("CUDA unverified");
+    const failed = sandboxGpuProofStatusSuffix({
+      status: "failed",
+      cudaVerified: false,
+      label: "cuInit(0)",
+      at: "t",
+    });
+    expect(failed).toContain("last CUDA proof failed");
+    expect(failed).toContain("cuInit(0)");
+  });
+});
diff --git a/src/lib/actions/sandbox/status.ts b/src/lib/actions/sandbox/status.ts
index 99667784ed..068bde5c36 100644
--- a/src/lib/actions/sandbox/status.ts
+++ b/src/lib/actions/sandbox/status.ts
@@ -12,6 +12,7 @@ import * as nim from "../../inference/nim";
 import * as sandboxVersion from "../../sandbox/version";
 import * as shields from "../../shields";
 import { isTerminalSandboxPhase, parseSandboxPhase } from "../../state/gateway";
+import type { SandboxGpuProofResult } from "../../state/registry";
 import * as registry from "../../state/registry";
 import {
   createSystemDeps as createSessionDeps,
@@ -28,26 +29,26 @@ import {
   printWrongGatewayActiveGuidance,
 } from "./gateway-state";
 import { isSandboxGatewayRunningForStatus } from "./process-recovery";
-import { collectSandboxStatusSnapshot } from "./status-snapshot";
 import {
   getSandboxStatusPreflight,
   printGatewayFailureLayerHeader,
   printSandboxStatusPreflightHeader,
   withoutTerminalPhasePreflight,
 } from "./status-preflight";
+import { collectSandboxStatusSnapshot } from "./status-snapshot";
 
 export {
+  type ClassifySandboxStatusPreflightFailureDeps,
   classifySandboxContainerFailureForStatus,
   classifySandboxStatusPreflightFailure,
-  isDockerDaemonUnreachableForStatus,
   getSandboxStatusPreflight,
+  isDockerDaemonUnreachableForStatus,
   printGatewayFailureLayerHeader,
   printSandboxStatusPreflightHeader,
-  withoutTerminalPhasePreflight,
-  type ClassifySandboxStatusPreflightFailureDeps,
   type SandboxStatusFailureLayer,
   type SandboxStatusPreflightFailure,
   type SandboxStatusPreflightResult,
+  withoutTerminalPhasePreflight,
 } from "./status-preflight";
 export {
   collectSandboxStatusSnapshot,
@@ -68,6 +69,29 @@ function shouldProbeSandboxRuntimeVersion(
   return lookup.state === "present" && Boolean(sandbox.agentVersion);
 }
 
+// True when sandbox GPU is enabled but no CUDA-usability proof has confirmed it
+// (older entries with no recorded proof, or a run whose CUDA proof could not
+// execute). Treated as not-yet-proven rather than healthy (#4231).
+export function sandboxGpuProofUnverified(
+  proof: SandboxGpuProofResult | null | undefined,
+): boolean {
+  return !proof || proof.status === "unverified";
+}
+
+// Render the proof-state suffix appended to the `Sandbox GPU: enabled` line so
+// the status reflects verified/unverified/failed CUDA usability instead of
+// reporting any configured GPU as healthy (#4231).
+export function sandboxGpuProofStatusSuffix(
+  proof: SandboxGpuProofResult | null | undefined,
+): string {
+  if (proof?.status === "verified") return ` ${G}(CUDA verified)${R}`;
+  if (proof?.status === "failed") {
+    const label = proof.label ? `: ${proof.label}` : "";
+    return ` ${RD}(last CUDA proof failed${label})${R}`;
+  }
+  return ` ${YW}(CUDA unverified)${R}`;
+}
+
 /**
  * Render one Inference status line. The main probe and each subprobe go
  * through this helper so multi-hop providers (e.g. ollama-local backend +
@@ -183,10 +207,26 @@ export async function showSandboxStatus(sandboxName: string): Promise<void> {
     const sandboxGpu = sandboxGpuEnabled ? "enabled" : "disabled";
     const sandboxGpuMode = sb.sandboxGpuMode ? ` (${sb.sandboxGpuMode})` : "";
     const sandboxGpuDevice = sb.sandboxGpuDevice ? ` device=${sb.sandboxGpuDevice}` : "";
+    const sandboxGpuProofSuffix = sandboxGpuEnabled
+      ? sandboxGpuProofStatusSuffix(sb.sandboxGpuProof)
+      : "";
     const openshellDriver = sb.openshellDriver || "unknown";
     const openshellVersion = sb.openshellVersion || "unknown";
     console.log(`    Host GPU: ${hostGpu}`);
-    console.log(`    Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}`);
+    console.log(
+      `    Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}${sandboxGpuProofSuffix}`,
+    );
+    if (sandboxGpuEnabled && sb.sandboxGpuProof?.status === "failed") {
+      const detail = sb.sandboxGpuProof.detail;
+      if (detail) console.log(`      ${detail}`);
+      console.log(
+        "      CUDA failed a live proof. Recreate with corrected GPU device/group access, or rerun onboard with --no-gpu.",
+      );
+    } else if (sandboxGpuEnabled && sandboxGpuProofUnverified(sb.sandboxGpuProof)) {
+      console.log(
+        "      CUDA usability has not been proven. Rerun onboard to verify, or use --no-gpu for CPU.",
+      );
+    }
     console.log(`    OpenShell: ${openshellVersion} (${openshellDriver})`);
     console.log(`    Policies: ${(sb.policies || []).join(", ") || "none"}`);
 
diff --git a/src/lib/inference/gpu-trust.ts b/src/lib/inference/gpu-trust.ts
index 7730e88a92..585282363f 100644
--- a/src/lib/inference/gpu-trust.ts
+++ b/src/lib/inference/gpu-trust.ts
@@ -25,6 +25,27 @@ export function isDenylistedNvidiaGpuName(name: string): boolean {
   return NVIDIA_GPU_NAME_DENYLIST_PATTERN.test(name);
 }
 
+// Result of a bounded Docker `--gpus` CUDA proof. `passed` is true only when a
+// real CUDA workload (not just nvidia-smi) succeeded — that is the signal that
+// distinguishes a genuine Windows-ARM N1X + WSL2 + Docker Desktop GPU (#4565)
+// from the Windows-on-ARM Snapdragon nvidia-smi shim (#3988/#4424), which has
+// no usable NVIDIA device and so cannot pass the workload.
+export interface DockerGpuProofResult {
+  passed: boolean;
+  timedOut: boolean;
+  exitCode: number | null;
+  diagnostic: string;
+}
+
+// Optional accept-path used by `detectGpu()` when an ARM64 Linux host reports a
+// denylisted `JMJWOA-Generic-*` placeholder. The prover returns `null` when the
+// host is not a proof candidate (not ARM64 WSL Docker Desktop), preserving the
+// #3988 fail-closed default; otherwise it returns the bounded Docker GPU proof
+// outcome so a passing real GPU can be trusted without trusting the name alone.
+export type Arm64WslDockerDesktopGpuProver = (
+  gpuNames: string[],
+) => DockerGpuProofResult | null;
+
 export function isPlausibleNvidiaGpuName(name: string): boolean {
   return !!name && !isDenylistedNvidiaGpuName(name) && NVIDIA_GPU_NAME_PATTERN.test(name);
 }
diff --git a/src/lib/inference/nim.test.ts b/src/lib/inference/nim.test.ts
index 97fe476940..0816ebb964 100644
--- a/src/lib/inference/nim.test.ts
+++ b/src/lib/inference/nim.test.ts
@@ -453,6 +453,99 @@ describe("nim", () => {
       }
     });
 
+    // #4565: a real Windows-ARM N1X + WSL2 + Docker Desktop host reports the
+    // same `JMJWOA-Generic-*` placeholder as the Snapdragon shim, but it can
+    // pass a bounded Docker `--gpus` CUDA proof. When the injected prover
+    // confirms the proof, the denylisted name is accepted and the detection is
+    // tagged so the sandbox preflight reaches the Docker Desktop WSL branch.
+    it("accepts a denylisted ARM64 GPU when the bounded Docker GPU proof passes (#4565)", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+      const proveArm64WslDockerDesktopGpu = vi.fn(() => ({
+        passed: true,
+        timedOut: false,
+        exitCode: 0,
+        diagnostic: "",
+      }));
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          const result = nimModule.detectGpu({ proveArm64WslDockerDesktopGpu });
+          expect(result).toMatchObject({
+            type: "nvidia",
+            name: "JMJWOA-Generic-GPU",
+            count: 1,
+            totalMemoryMB: 65471,
+            wslDockerDesktopGpuProofPassed: true,
+          });
+          expect(proveArm64WslDockerDesktopGpu).toHaveBeenCalledWith(["JMJWOA-Generic-GPU"]);
+        });
+      } finally {
+        restore();
+      }
+    });
+
+    // Snapdragon WoA fail-closed: the same placeholder name, but the bounded
+    // CUDA proof fails because there is no usable NVIDIA device. The detection
+    // must stay null so #3988/#4424 is not reopened.
+    it("keeps rejecting a denylisted ARM64 GPU when the Docker GPU proof fails (#4565/#3988)", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+      const failingProver = vi.fn(() => ({
+        passed: false,
+        timedOut: false,
+        exitCode: 1,
+        diagnostic: "no CUDA-capable device is detected",
+      }));
+      const notCandidateProver = vi.fn(() => null);
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: failingProver })).toBeNull();
+          // A host that is not an ARM64 WSL Docker Desktop candidate returns
+          // null from the prover and must also fail closed (no proof attempted).
+          expect(
+            nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: notCandidateProver }),
+          ).toBeNull();
+        });
+      } finally {
+        restore();
+      }
+    });
+
+    // When no prover is wired (deps explicitly null), the denylist stays
+    // fail-closed exactly as before the #4565 accept-path existed.
+    it("rejects a denylisted ARM64 GPU when no Docker GPU prover is provided", () => {
+      const runCapture = vi.fn((cmd: string | string[]) => {
+        if (!Array.isArray(cmd)) throw new Error("expected argv array");
+        if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
+          return "JMJWOA-Generic-GPU, 65471, 65000\n";
+        }
+        return "";
+      });
+      const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
+
+      try {
+        withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
+          expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: null })).toBeNull();
+        });
+      } finally {
+        restore();
+      }
+    });
+
     // Trust-tier gate: on ARM64 Linux with generic firmware, the absence of
     // `/proc/driver/nvidia/` is the Windows-on-ARM WSL shim profile and must
     // be rejected even when the nvidia-smi probe returns a plausible-looking
diff --git a/src/lib/inference/nim.ts b/src/lib/inference/nim.ts
index 49aa289200..c71ba90efb 100644
--- a/src/lib/inference/nim.ts
+++ b/src/lib/inference/nim.ts
@@ -21,6 +21,7 @@ const nimImages = require("../../../bin/lib/nim-images.json");
 
 import { VLLM_PORT } from "../core/ports";
 import {
+  type Arm64WslDockerDesktopGpuProver,
   isDenylistedNvidiaGpuName,
   isPlausibleNvidiaGpuName,
   nvidiaHostLooksGenuine,
@@ -70,6 +71,44 @@ export interface GpuDetection {
   unifiedMemory?: boolean;
   spark?: boolean;
   platform?: NvidiaPlatform;
+  // Set when a denylisted `JMJWOA-Generic-*` placeholder name was accepted only
+  // because a bounded Docker `--gpus` CUDA proof passed (Windows-ARM N1X + WSL2
+  // + Docker Desktop, #4565). Diagnostic marker that this detection cleared a
+  // live proof rather than firmware/name trust. The sandbox GPU preflight still
+  // reaches the Docker Desktop WSL compatibility branch via its own
+  // `detectWslDockerDesktopStatus()` check (consistent because the proof itself
+  // requires Docker Desktop WSL); this flag does not gate that branch.
+  wslDockerDesktopGpuProofPassed?: boolean;
+}
+
+export interface DetectGpuDeps {
+  // Optional accept-path for ARM64 WSL Docker Desktop `JMJWOA-Generic-*` GPUs
+  // (#4565). Injected in tests; in production `detectGpu()` lazily builds the
+  // default prover from the onboard WSL Docker Desktop module only when it is
+  // about to reject a denylisted ARM64 name.
+  proveArm64WslDockerDesktopGpu?: Arm64WslDockerDesktopGpuProver | null;
+}
+
+// Lazily construct the default ARM64 WSL Docker Desktop GPU prover. Kept lazy
+// (and behind a require) so the inference layer does not statically depend on
+// the onboard layer, and so the bounded Docker proof is only wired when we
+// actually reach the denylist-reject path on an ARM64 host.
+function defaultArm64WslDockerDesktopGpuProver(): Arm64WslDockerDesktopGpuProver | null {
+  try {
+    return require("../onboard/wsl-docker-desktop-gpu").createArm64WslDockerDesktopGpuProver();
+  } catch (error) {
+    // Only the optional module-resolution case should degrade to "no prover";
+    // a real bug inside the prover module must bubble up rather than masquerade
+    // as a missing GPU on an otherwise-supported N1X host.
+    if (
+      error &&
+      typeof error === "object" &&
+      (error as NodeJS.ErrnoException).code === "MODULE_NOT_FOUND"
+    ) {
+      return null;
+    }
+    throw error;
+  }
 }
 
 // Group GPUs by their nvidia-smi model name, preserving first-appearance order.
@@ -314,7 +353,7 @@ export function canRunNimWithMemory(totalMemoryMB: number): boolean {
   return nimImages.models.some((m: NimModel) => m.minGpuMemoryMB <= totalMemoryMB);
 }
 
-export function detectGpu(): GpuDetection | null {
+export function detectGpu(deps: DetectGpuDeps = {}): GpuDetection | null {
   // Try NVIDIA first — query name, total, and free VRAM in a single call so
   // the preflight line can show the GPU model alongside the memory size and
   // the bootstrap-model selector can pick a model that fits currently
@@ -356,20 +395,43 @@ export function detectGpu(): GpuDetection | null {
         // Off Spark/Station/Jetson firmware, layer a denylist check and the
         // trust-tier gate before trusting the nvidia-smi probe. The observed
         // Windows-on-ARM WSL2 nvidia-smi shim emits a `JMJWOA-Generic-*`
-        // placeholder name AND ships no `/proc/driver/nvidia/` directory, so
-        // either signal alone is sufficient to reject. Treat any denylisted
-        // row as a poisoned probe and reject the whole result — partial
-        // filtering would let a mixed-row spoof surface a non-placeholder
-        // row as a real GPU.
+        // placeholder name AND ships no `/proc/driver/nvidia/` directory. A
+        // denylisted row still fails closed by default; the only escape is a
+        // bounded Docker `--gpus` CUDA proof (#4565), which the Snapdragon shim
+        // cannot pass. Without that proof, any denylisted row rejects the whole
+        // probe — partial filtering would let a mixed-row spoof surface a
+        // non-placeholder row as a real GPU.
         const firmwareConfirmsNvidia =
           platform === "spark" || platform === "station" || platform === "jetson";
         let trusted: ParsedGpu[];
+        let wslDockerDesktopGpuProofPassed = false;
         if (firmwareConfirmsNvidia) {
           trusted = parsed;
-        } else {
-          if (parsed.some((p: ParsedGpu) => isDenylistedNvidiaGpuName(p.name))) {
+        } else if (parsed.some((p: ParsedGpu) => isDenylistedNvidiaGpuName(p.name))) {
+          // A denylisted `JMJWOA-Generic-*` placeholder. Both real Windows-ARM
+          // N1X (WSL2 + Docker Desktop) and the Snapdragon nvidia-smi shim emit
+          // this name, so the name and `/proc/driver/nvidia` are insufficient.
+          // Give the host one bounded Docker `--gpus` CUDA proof: only the real
+          // GPU can run the workload, so a pass safely accepts N1X while the
+          // shim keeps failing closed (#4565 without reopening #3988/#4424).
+          const prover =
+            deps.proveArm64WslDockerDesktopGpu === undefined
+              ? defaultArm64WslDockerDesktopGpuProver()
+              : deps.proveArm64WslDockerDesktopGpu;
+          const proof = prover ? prover(parsed.map((p: ParsedGpu) => p.name)) : null;
+          if (!proof || !proof.passed) {
             return null;
           }
+          // The proof confirms a usable GPU, but it does not vouch for every
+          // row. Keep only the placeholder rows it covers plus any plausibly-
+          // named NVIDIA rows; drop unrecognized garbage so a mixed-row spoof
+          // cannot inflate totalMemoryMB with a phantom device.
+          trusted = parsed.filter(
+            (p: ParsedGpu) =>
+              isDenylistedNvidiaGpuName(p.name) || isPlausibleNvidiaGpuName(p.name),
+          );
+          wslDockerDesktopGpuProofPassed = true;
+        } else {
           if (!nvidiaHostLooksGenuine()) {
             return null;
           }
@@ -402,6 +464,7 @@ export function detectGpu(): GpuDetection | null {
           nimCapable: canRunNimWithMemory(totalMemoryMB),
           platform,
           spark: platform === "spark",
+          ...(wslDockerDesktopGpuProofPassed ? { wslDockerDesktopGpuProofPassed: true } : {}),
         };
       }
     }
diff --git a/src/lib/onboard/docker-gpu-local-inference.test.ts b/src/lib/onboard/docker-gpu-local-inference.test.ts
index 5b9a4b77b0..58e2d90efd 100644
--- a/src/lib/onboard/docker-gpu-local-inference.test.ts
+++ b/src/lib/onboard/docker-gpu-local-inference.test.ts
@@ -307,6 +307,28 @@ describe("verifyGpuSandboxAfterReady", () => {
     expect(verifyDirectSandboxGpu).toHaveBeenCalledWith("alpha");
   });
 
+  it("captures the CUDA-usability proof onto the config for status persistence (#4231)", () => {
+    const proof = { status: "verified" as const, cudaVerified: true, at: "t" };
+    // Fresh config so the assignment does not leak into the shared GPU_CONFIG.
+    const config: { sandboxGpuEnabled: boolean; sandboxGpuProof?: typeof proof | null } = {
+      sandboxGpuEnabled: true,
+    };
+    verifyGpuSandboxAfterReady(
+      config,
+      "vllm-local",
+      baseOptions({
+        verifyDirectSandboxGpu: vi.fn(() => proof),
+        deps: {
+          findContainerIds: () => ["container-abc"],
+          dockerCapture: vi.fn(() => inspectWithNetworkMode("host")),
+          dockerRun: dockerRunWithCurl({ status: 0 }),
+          sleep: vi.fn(),
+        },
+      }),
+    );
+    expect(config.sandboxGpuProof).toEqual(proof);
+  });
+
   it("does not duplicate proof diagnostics when Docker GPU patch verifier handles them", () => {
     const proofError = new Error("process.exit");
     const verifyGpuOrExit = vi.fn(() => {
diff --git a/src/lib/onboard/docker-gpu-local-inference.ts b/src/lib/onboard/docker-gpu-local-inference.ts
index 933e9e730f..c61aa77458 100644
--- a/src/lib/onboard/docker-gpu-local-inference.ts
+++ b/src/lib/onboard/docker-gpu-local-inference.ts
@@ -8,6 +8,7 @@ import {
   getLocalProviderValidationBaseUrl,
   LOCAL_INFERENCE_SANDBOX_HOST_URL_ENV,
 } from "../inference/local";
+import type { SandboxGpuProofResult } from "../state/registry";
 import {
   DOCKER_GPU_PATCH_NETWORK_ENV,
   type DockerGpuPatchMode,
@@ -31,6 +32,10 @@ const DOCKER_GPU_INFERENCE_PROBE_RETRY_DELAY_SECS = 2;
 type DockerGpuLocalInferenceConfig = {
   sandboxGpuEnabled: boolean;
   sandboxGpuDevice?: string | null;
+  // Written back by `verifyGpuSandboxAfterReady` with the CUDA-usability proof
+  // result so the registry/`status` can distinguish a configured GPU from a
+  // proven-usable one (#4231).
+  sandboxGpuProof?: SandboxGpuProofResult | null;
 };
 
 type DockerGpuLocalInferenceOptions = {
@@ -369,8 +374,10 @@ export type GpuSandboxAfterReadyOptions = {
   sandboxName: string;
   dockerDriverGateway: boolean;
   useDockerGpuPatch: boolean;
-  verifyDirectSandboxGpu: (sandboxName: string) => void;
-  verifyGpuOrExit?: (verifyDirectSandboxGpu: (sandboxName: string) => void) => void;
+  verifyDirectSandboxGpu: (sandboxName: string) => SandboxGpuProofResult;
+  verifyGpuOrExit?: (
+    verifyDirectSandboxGpu: (sandboxName: string) => SandboxGpuProofResult,
+  ) => SandboxGpuProofResult;
   selectedMode: () => DockerGpuPatchMode | null;
   runCaptureOpenshell: (args: string[], opts?: Record<string, unknown>) => string;
   env?: NodeJS.ProcessEnv;
@@ -393,11 +400,12 @@ export function verifyGpuSandboxAfterReady(
   options: GpuSandboxAfterReadyOptions,
 ): void {
   try {
-    if (options.verifyGpuOrExit) {
-      options.verifyGpuOrExit(options.verifyDirectSandboxGpu);
-    } else {
-      options.verifyDirectSandboxGpu(options.sandboxName);
-    }
+    // Capture the CUDA-usability proof result and write it back onto the shared
+    // config so onboarding can persist it to the registry and `status` can
+    // report proven usability rather than mere configuration (#4231).
+    config.sandboxGpuProof = options.verifyGpuOrExit
+      ? options.verifyGpuOrExit(options.verifyDirectSandboxGpu)
+      : options.verifyDirectSandboxGpu(options.sandboxName);
   } catch (error) {
     // `verifyGpuOrExit` is supplied by the Docker GPU create patch and already
     // prints the richer Error-phase / patched-container diagnostics before
diff --git a/src/lib/onboard/docker-gpu-sandbox-create.ts b/src/lib/onboard/docker-gpu-sandbox-create.ts
index 00e7dd3fbd..2c22f46608 100644
--- a/src/lib/onboard/docker-gpu-sandbox-create.ts
+++ b/src/lib/onboard/docker-gpu-sandbox-create.ts
@@ -1,6 +1,8 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 
+import { getSandboxFailurePhase } from "../state/gateway";
+import type { SandboxGpuProofResult } from "../state/registry";
 import type {
   DockerGpuPatchBackend,
   DockerGpuPatchDeps,
@@ -19,7 +21,6 @@ import {
   shouldApplyDockerGpuPatch,
   waitForOpenShellSupervisorReconnect,
 } from "./docker-gpu-patch";
-import { getSandboxFailurePhase } from "../state/gateway";
 
 type DockerGpuSandboxCreateDeps = Pick<
   DockerGpuPatchDeps,
@@ -64,9 +65,12 @@ export type DockerGpuSandboxCreatePatch = {
    * Run the GPU proof while distinguishing "sandbox in terminal phase" from
    * "proof failed inside a live sandbox". Calls `process.exit(1)` for the
    * former and rethrows after printing diagnostics for the latter so the
-   * onboarding flow surfaces the right failure cause (#4316).
+   * onboarding flow surfaces the right failure cause (#4316). Returns the
+   * CUDA-usability proof result on success so callers can persist it (#4231).
    */
-  verifyGpuOrExit: (verifyDirectSandboxGpu: (sandboxName: string) => void) => void;
+  verifyGpuOrExit: (
+    verifyDirectSandboxGpu: (sandboxName: string) => SandboxGpuProofResult,
+  ) => SandboxGpuProofResult;
 };
 
 export function createDockerGpuSandboxCreatePatch(
@@ -209,7 +213,7 @@ export function createDockerGpuSandboxCreatePatch(
         }
       }
       try {
-        verifyDirectSandboxGpu(sandboxName);
+        return verifyDirectSandboxGpu(sandboxName);
       } catch (error) {
         printDockerGpuProofFailure(sandboxName, error, result?.mode ?? null, {
           runCaptureOpenshell: options.deps.runCaptureOpenshell,
diff --git a/src/lib/onboard/sandbox-gpu-mode.ts b/src/lib/onboard/sandbox-gpu-mode.ts
index dc6034e301..fe7ef7433b 100644
--- a/src/lib/onboard/sandbox-gpu-mode.ts
+++ b/src/lib/onboard/sandbox-gpu-mode.ts
@@ -2,6 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import type { GpuDetection } from "../inference/nim";
+import type { SandboxGpuProofResult } from "../state/registry";
 
 export type SandboxGpuMode = "auto" | "1" | "0";
 export type SandboxGpuFlag = "enable" | "disable" | null;
@@ -13,6 +14,10 @@ export type SandboxGpuConfig = {
   sandboxGpuEnabled: boolean;
   sandboxGpuDevice: string | null;
   errors: string[];
+  // Outcome of the live direct sandbox GPU proof, populated after onboarding
+  // runs the verifier so it can be persisted to the registry (#4231). Absent
+  // until the proof runs; never overwrites a stored proof on reuse paths.
+  sandboxGpuProof?: SandboxGpuProofResult | null;
 };
 
 export type ResumeSandboxGpuOverrides = {
diff --git a/src/lib/onboard/sandbox-gpu-preflight.test.ts b/src/lib/onboard/sandbox-gpu-preflight.test.ts
index b42a790a1f..193306879d 100644
--- a/src/lib/onboard/sandbox-gpu-preflight.test.ts
+++ b/src/lib/onboard/sandbox-gpu-preflight.test.ts
@@ -156,25 +156,118 @@ describe("sandbox GPU preflight", () => {
     );
   });
 
-  it("treats optional direct sandbox GPU proof failures as non-fatal", () => {
+  it("treats optional direct sandbox GPU proof failures as non-fatal and reports unverified", () => {
     const runOpenshell = vi.fn(() => ({ status: 1, stdout: "", stderr: "optional proof failed" }));
     const verifier = createDirectSandboxGpuVerifier({
       runOpenshell,
+      detectNvidiaPlatform: () => "linux",
       buildDirectSandboxGpuProofCommands: vi.fn(() => [
-        { args: ["sandbox", "exec", "demo", "--", "nvidia-smi"], label: "nvidia-smi", optional: true },
-        { args: ["sandbox", "exec", "demo", "--", "false"], label: "fatal proof" },
+        { id: "nvidia-smi", args: ["sandbox", "exec", "demo", "--", "nvidia-smi"], label: "nvidia-smi", optional: true },
+        { id: "cuda-init", args: ["sandbox", "exec", "demo", "--", "false"], label: "cuda-init", optional: true },
+      ]),
+      compactText: (value) => value.trim(),
+      redact: (value) => String(value),
+    });
+
+    let result: ReturnType<typeof verifier> | undefined;
+    expect(() => {
+      result = verifier("demo");
+    }).not.toThrow();
+    // Optional failures no longer short-circuit; every optional proof runs so
+    // the CUDA-usability outcome is observed rather than swallowed (#4231).
+    expect(runOpenshell).toHaveBeenCalledTimes(2);
+    expect(result?.status).toBe("unverified");
+    expect(result?.cudaVerified).toBe(false);
+  });
+
+  it("reports failed when the CUDA usability proof reaches the driver and fails (#4231)", () => {
+    const verifier = createDirectSandboxGpuVerifier({
+      runOpenshell: vi.fn((args: string[]) => {
+        if (args.includes("cuda-init-cmd")) {
+          return { status: 1, stdout: "cuInit(0)=999", stderr: "" };
+        }
+        return { status: 0, stdout: "", stderr: "" };
+      }),
+      detectNvidiaPlatform: () => "jetson",
+      buildDirectSandboxGpuProofCommands: vi.fn(() => [
+        { id: "nvidia-smi", args: ["sandbox", "exec", "demo", "--", "nvidia-smi"], label: "nvidia-smi" },
+        { id: "cuda-init", args: ["sandbox", "exec", "demo", "--", "cuda-init-cmd"], label: "cuInit(0)", optional: true },
+      ]),
+      compactText: (value) => value.trim(),
+      redact: (value) => String(value),
+    });
+
+    const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined);
+    try {
+      const result = verifier("demo");
+      expect(result.status).toBe("failed");
+      expect(result.cudaVerified).toBe(false);
+      expect(result.detail).toContain("cuInit(0)=999");
+      const warnings = warnSpy.mock.calls.map((call) => call[0]).join("\n");
+      expect(warnings).toContain("/dev/nvmap");
+    } finally {
+      warnSpy.mockRestore();
+    }
+  });
+
+  it("reports verified when the CUDA usability proof passes", () => {
+    const verifier = createDirectSandboxGpuVerifier({
+      runOpenshell: vi.fn(() => ({ status: 0, stdout: "cuInit(0)=0", stderr: "" })),
+      detectNvidiaPlatform: () => "linux",
+      buildDirectSandboxGpuProofCommands: vi.fn(() => [
+        { id: "cuda-init", args: ["sandbox", "exec", "demo", "--", "cuda"], label: "cuInit(0)", optional: true },
+      ]),
+      compactText: (value) => value.trim(),
+      redact: (value) => String(value),
+    });
+
+    const result = verifier("demo");
+    expect(result.status).toBe("verified");
+    expect(result.cudaVerified).toBe(true);
+  });
+
+  it("does not report verified when cuda-init exits 0 without the cuInit marker", () => {
+    // A zero exit that never printed `cuInit(0)=` (e.g. a wrapper that swallowed
+    // the real exit code) must not be trusted as CUDA-verified.
+    const verifier = createDirectSandboxGpuVerifier({
+      runOpenshell: vi.fn(() => ({ status: 0, stdout: "", stderr: "" })),
+      detectNvidiaPlatform: () => "linux",
+      buildDirectSandboxGpuProofCommands: vi.fn(() => [
+        { id: "cuda-init", args: ["sandbox", "exec", "demo", "--", "cuda"], label: "cuInit(0)", optional: true },
+      ]),
+      compactText: (value) => value.trim(),
+      redact: (value) => String(value),
+    });
+
+    const result = verifier("demo");
+    expect(result.status).toBe("unverified");
+    expect(result.cudaVerified).toBe(false);
+  });
+
+  it("treats a zero exit with a non-zero cuInit code as failed, not verified (#4231)", () => {
+    // A wrapper that swallows the probe's non-zero exit but still prints a
+    // non-zero `cuInit(0)=<err>` reached the driver and CUDA failed; it must not
+    // read as verified just because the process exited 0.
+    const verifier = createDirectSandboxGpuVerifier({
+      runOpenshell: vi.fn(() => ({ status: 0, stdout: "cuInit(0)=999", stderr: "" })),
+      detectNvidiaPlatform: () => "linux",
+      buildDirectSandboxGpuProofCommands: vi.fn(() => [
+        { id: "cuda-init", args: ["sandbox", "exec", "demo", "--", "cuda"], label: "cuInit(0)", optional: true },
       ]),
       compactText: (value) => value.trim(),
       redact: (value) => String(value),
     });
 
-    expect(() => verifier("demo")).not.toThrow();
-    expect(runOpenshell).toHaveBeenCalledTimes(1);
+    const result = verifier("demo");
+    expect(result.status).toBe("failed");
+    expect(result.cudaVerified).toBe(false);
+    expect(result.detail).toContain("cuInit(0)=999");
   });
 
   it("throws on required direct sandbox GPU proof failures", () => {
     const verifier = createDirectSandboxGpuVerifier({
       runOpenshell: vi.fn(() => ({ status: 1, stdout: "", stderr: "required proof failed" })),
+      detectNvidiaPlatform: () => "linux",
       buildDirectSandboxGpuProofCommands: vi.fn(() => [
         { args: ["sandbox", "exec", "demo", "--", "false"], label: "fatal proof" },
       ]),
@@ -192,6 +285,7 @@ describe("sandbox GPU preflight", () => {
       env: { WSL_DISTRO_NAME: "Ubuntu" },
       dockerInfoFormat: vi.fn(() => '"Docker Desktop"'),
       runOpenshell: vi.fn(() => ({ status: 1, stdout: "", stderr: "required proof failed" })),
+      detectNvidiaPlatform: () => "linux",
       buildDirectSandboxGpuProofCommands: vi.fn(() => [
         { args: ["sandbox", "exec", "demo", "--", "false"], label: "fatal proof" },
       ]),
diff --git a/src/lib/onboard/sandbox-gpu-preflight.ts b/src/lib/onboard/sandbox-gpu-preflight.ts
index d3ea20e369..2c9529b4a4 100644
--- a/src/lib/onboard/sandbox-gpu-preflight.ts
+++ b/src/lib/onboard/sandbox-gpu-preflight.ts
@@ -2,6 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 
 import { dockerInfoFormat } from "../adapters/docker";
+import type { GpuDetection } from "../inference/nim";
+import type { SandboxGpuProofResult } from "../state/registry";
 import { findReadableNvidiaCdiSpecFiles, getDockerCdiSpecDirs } from "./docker-cdi";
 import type { SandboxGpuConfig, SandboxGpuFlag } from "./sandbox-gpu-mode";
 import {
@@ -45,6 +47,21 @@ export function resolveSandboxGpuFlagFromOptions(opts: SandboxGpuFlagOptions): S
   return null;
 }
 
+// Jetson/Tegra CUDA failures are usually device/group permission issues rather
+// than CDI/runtime misconfiguration: the sandbox sees the GPU but the agent
+// user lacks access to the Tegra device nodes. Surface the concrete devices and
+// groups so the user can fix the recreate rather than seeing a bare "enabled"
+// status that hides an unusable GPU (#4231).
+export function jetsonGpuProofRemediationLines(): string[] {
+  return [
+    "Jetson/Tegra CUDA proof did not pass. CUDA needs access to the Tegra device",
+    "nodes; confirm the sandbox propagates them and the agent user's groups:",
+    "  ls -l /dev/nvmap /dev/nvhost-* (must be readable by the sandbox)",
+    "  add the host video/render groups via --group-add when recreating",
+    "Then recreate the sandbox, or force CPU behavior with NEMOCLAW_SANDBOX_GPU=0.",
+  ];
+}
+
 export function sandboxGpuRemediationLines(
   options: { wslDockerDesktop?: boolean; wslDockerDesktopStatus?: WslDockerDesktopStatus } = {},
 ): string[] {
@@ -143,43 +160,136 @@ export interface DirectSandboxGpuVerifierDeps extends WslDockerDesktopDetectionD
     opts?: Record<string, unknown>,
   ): { status?: number | null; stdout?: unknown; stderr?: unknown };
   buildDirectSandboxGpuProofCommands?: (sandboxName: string) => Array<{
+    id?: string;
     args: string[];
     label: string;
     optional?: boolean;
   }>;
   compactText(value: string): string;
   redact(value: unknown): string;
+  // Host firmware platform resolver, used to choose Jetson-specific remediation
+  // when a CUDA proof fails. Defaults to the live `nim.detectNvidiaPlatform()`
+  // so onboarding does not have to thread the platform through. Injected in
+  // tests to exercise the Jetson path without Jetson firmware.
+  detectNvidiaPlatform?: () => GpuDetection["platform"] | null;
 }
 
-export function createDirectSandboxGpuVerifier(deps: DirectSandboxGpuVerifierDeps) {
-  return function verifyDirectSandboxGpu(sandboxName: string): void {
+// The proof whose result decides CUDA usability. `cuInit(0)` via libcuda is the
+// authoritative usability signal (it actually initializes the CUDA driver), so
+// a clean pass means "verified" and a run that reaches the driver and fails
+// means "failed" rather than merely "unverified".
+const CUDA_USABILITY_PROOF_ID = "cuda-init";
+// Capture the cuInit(0) return code so we can require it to be 0 for a verified
+// result. Matching only the marker text is not enough: a wrapper that swallows
+// the probe's non-zero exit but still prints `cuInit(0)=<err>` would otherwise
+// read as verified for an unusable GPU (#4231).
+const CUDA_INIT_RESULT_PATTERN = /cuInit\(0\)=(-?\d+)/;
+
+export type VerifyDirectSandboxGpu = (
+  sandboxName: string,
+  hostGpuPlatform?: GpuDetection["platform"] | null,
+) => SandboxGpuProofResult;
+
+export function createDirectSandboxGpuVerifier(
+  deps: DirectSandboxGpuVerifierDeps,
+): VerifyDirectSandboxGpu {
+  return function verifyDirectSandboxGpu(
+    sandboxName: string,
+    hostGpuPlatform?: GpuDetection["platform"] | null,
+  ): SandboxGpuProofResult {
     console.log("  Verifying direct sandbox GPU access...");
+    const resolvedPlatform =
+      hostGpuPlatform !== undefined
+        ? hostGpuPlatform
+        : (deps.detectNvidiaPlatform ?? require("../inference/nim").detectNvidiaPlatform)();
     const buildProofCommands =
       deps.buildDirectSandboxGpuProofCommands ??
       require("./initial-policy").buildDirectSandboxGpuProofCommands;
+    let cudaVerified = false;
+    // A CUDA-usability proof that reached the driver and failed (vs one that
+    // could not run at all). Records the proof that determines "failed" status.
+    let cudaFailure: { label: string; detail: string } | null = null;
     for (const proof of buildProofCommands(sandboxName)) {
       const result = deps.runOpenshell(proof.args, {
         ignoreError: true,
         suppressOutput: true,
         timeout: 30_000,
       });
+      // Test the cuInit marker against the FULL combined output; truncation to
+      // 300 chars is only for display/storage, so a verbose proof cannot push
+      // the marker past the cutoff and silently downgrade the classification.
+      const rawOutput = deps.redact(`${result.stderr || ""} ${result.stdout || ""}`);
+      const cudaInitMatch = rawOutput.match(CUDA_INIT_RESULT_PATTERN);
+      const cudaInitRan = cudaInitMatch !== null;
+      // Only `cuInit(0)=0` proves usability; any other return code means the
+      // driver was reached but initialization failed.
+      const cudaInitSucceeded = cudaInitMatch?.[1] === "0";
+      const diagnostic = deps.compactText(rawOutput).slice(0, 300);
       if (result.status === 0) {
         console.log(`  ✓ GPU proof passed: ${proof.label}`);
+        if (proof.id === CUDA_USABILITY_PROOF_ID && cudaInitRan) {
+          // Require the cuInit(0)=0 marker on success too, symmetric with the
+          // failure path: a zero exit without driver initialization, or a
+          // wrapper that swallowed a non-zero exit but still printed a non-zero
+          // cuInit code, must not read as verified — treat the latter as failed.
+          if (cudaInitSucceeded) {
+            cudaVerified = true;
+          } else {
+            cudaFailure = { label: proof.label, detail: diagnostic };
+          }
+        }
         continue;
       }
-      if (proof.optional === true) return;
-      const diagnostic = deps.compactText(deps.redact(`${result.stderr || ""} ${result.stdout || ""}`));
-      console.error(`  ✗ GPU proof failed: ${proof.label}`);
-      if (diagnostic) console.error(`    ${diagnostic.slice(0, 300)}`);
-      for (const line of sandboxGpuRemediationLines({
-        wslDockerDesktopStatus: detectWslDockerDesktopStatus(deps),
-      })) {
-        console.error(`    ${line}`);
+      if (proof.optional !== true) {
+        // Required proof (e.g. the sandbox-exec wrapper itself): keep the
+        // historical hard-fail so onboarding aborts and rolls back.
+        console.error(`  ✗ GPU proof failed: ${proof.label}`);
+        if (diagnostic) console.error(`    ${diagnostic}`);
+        for (const line of sandboxGpuRemediationLines({
+          wslDockerDesktopStatus: detectWslDockerDesktopStatus(deps),
+        })) {
+          console.error(`    ${line}`);
+        }
+        const statusText = String(result.status || 1);
+        const diagnosticSuffix = diagnostic ? `: ${diagnostic}` : "";
+        throw new Error(`GPU proof failed: ${proof.label} (status ${statusText})${diagnosticSuffix}`);
       }
-      const statusText = String(result.status || 1);
-      const diagnosticSuffix = diagnostic ? `: ${diagnostic.slice(0, 300)}` : "";
-      throw new Error(`GPU proof failed: ${proof.label} (status ${statusText})${diagnosticSuffix}`);
+      // Optional proof failure is non-fatal but is no longer swallowed: a
+      // CUDA-usability proof that reached the driver and failed marks the GPU
+      // as proven-unusable so `status` can report it instead of "enabled"
+      // (#4231, Jetson /dev/nvmap permission failures).
+      if (proof.id === CUDA_USABILITY_PROOF_ID && cudaInitRan) {
+        cudaFailure = { label: proof.label, detail: diagnostic };
+      }
+      console.warn(`  ⚠ GPU proof inconclusive: ${proof.label}`);
+      if (diagnostic) console.warn(`    ${diagnostic}`);
+    }
+    const status: SandboxGpuProofResult["status"] = cudaVerified
+      ? "verified"
+      : cudaFailure
+        ? "failed"
+        : "unverified";
+    if (status === "verified") {
+      console.log("  ✓ Sandbox CUDA usability proven (cuInit succeeded).");
+    } else if (status === "failed") {
+      console.warn(`  ⚠ Sandbox CUDA proof failed: ${cudaFailure?.label}`);
+      const lines =
+        resolvedPlatform === "jetson"
+          ? jetsonGpuProofRemediationLines()
+          : sandboxGpuRemediationLines({ wslDockerDesktopStatus: detectWslDockerDesktopStatus(deps) });
+      for (const line of lines) console.warn(`    ${line}`);
+    } else {
+      console.warn(
+        "  ⚠ Sandbox GPU enabled but CUDA usability is unverified (no CUDA proof ran).",
+      );
     }
+    return {
+      status,
+      cudaVerified,
+      label: cudaFailure?.label ?? null,
+      detail: cudaFailure?.detail ?? null,
+      at: new Date().toISOString(),
+    };
   };
 }
 
diff --git a/src/lib/onboard/sandbox-registry-metadata.ts b/src/lib/onboard/sandbox-registry-metadata.ts
index a14429d03f..4242554657 100644
--- a/src/lib/onboard/sandbox-registry-metadata.ts
+++ b/src/lib/onboard/sandbox-registry-metadata.ts
@@ -21,6 +21,7 @@ export interface SandboxRegistryMetadataHelpers {
     | "sandboxGpuEnabled"
     | "sandboxGpuMode"
     | "sandboxGpuDevice"
+    | "sandboxGpuProof"
     | "openshellDriver"
     | "openshellVersion"
   >;
@@ -46,6 +47,7 @@ export function createSandboxRegistryMetadataHelpers(
     | "sandboxGpuEnabled"
     | "sandboxGpuMode"
     | "sandboxGpuDevice"
+    | "sandboxGpuProof"
     | "openshellDriver"
     | "openshellVersion"
   > {
@@ -59,6 +61,9 @@ export function createSandboxRegistryMetadataHelpers(
       sandboxGpuEnabled: config.sandboxGpuEnabled,
       sandboxGpuMode: config.mode,
       sandboxGpuDevice: config.sandboxGpuDevice,
+      // Only persist a proof when this run produced one; omit on reuse/update
+      // paths so a prior proof result is preserved rather than nulled out.
+      ...(config.sandboxGpuProof ? { sandboxGpuProof: config.sandboxGpuProof } : {}),
       openshellDriver: deps.isLinuxDockerDriverGatewayEnabled() ? "docker" : "kubernetes",
       openshellVersion: deps.getInstalledOpenshellVersion(
         deps.runCaptureOpenshell(["--version"], { ignoreError: true }),
diff --git a/src/lib/onboard/wsl-docker-desktop-gpu.test.ts b/src/lib/onboard/wsl-docker-desktop-gpu.test.ts
index e0103f978b..32c482cad1 100644
--- a/src/lib/onboard/wsl-docker-desktop-gpu.test.ts
+++ b/src/lib/onboard/wsl-docker-desktop-gpu.test.ts
@@ -8,11 +8,15 @@ vi.mock("../adapters/docker", () => ({
 }));
 
 import {
+  createArm64WslDockerDesktopGpuProver,
   detectWslDockerDesktopStatus,
+  isExecFormatErrorDiagnostic,
   isWslDockerDesktopRuntime,
   WSL_DOCKER_DESKTOP_GPU_COMPATIBILITY_REMOVAL_CONDITION,
+  WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND,
   wslDockerDesktopGpuCompatibilityAction,
   wslDockerDesktopGpuCompatibilityRemediationLines,
+  wslDockerDesktopGpuProofTimeoutMs,
 } from "./wsl-docker-desktop-gpu";
 
 describe("WSL Docker Desktop GPU compatibility helpers", () => {
@@ -64,3 +68,109 @@ describe("WSL Docker Desktop GPU compatibility helpers", () => {
     expect(WSL_DOCKER_DESKTOP_GPU_COMPATIBILITY_REMOVAL_CONDITION).toContain("Remove");
   });
 });
+
+describe("createArm64WslDockerDesktopGpuProver (#4565)", () => {
+  const passingProof = { passed: true, timedOut: false, exitCode: 0, diagnostic: "" };
+
+  it("returns null on non-ARM64 hosts without running the proof", () => {
+    const runProof = vi.fn(() => passingProof);
+    const prover = createArm64WslDockerDesktopGpuProver({
+      platform: "linux",
+      arch: "x64",
+      detectWslDockerDesktopStatus: () => "docker-desktop",
+      runProof,
+      log: () => undefined,
+    });
+    expect(prover(["JMJWOA-Generic-GPU"])).toBeNull();
+    expect(runProof).not.toHaveBeenCalled();
+  });
+
+  it("returns null when the host is not Docker Desktop-backed WSL", () => {
+    const runProof = vi.fn(() => passingProof);
+    const prover = createArm64WslDockerDesktopGpuProver({
+      platform: "linux",
+      arch: "arm64",
+      detectWslDockerDesktopStatus: () => "not-docker-desktop",
+      runProof,
+      log: () => undefined,
+    });
+    expect(prover(["JMJWOA-Generic-GPU"])).toBeNull();
+    expect(runProof).not.toHaveBeenCalled();
+  });
+
+  it("runs the bounded proof and reports the result on ARM64 Docker Desktop WSL", () => {
+    const runProof = vi.fn((_argv: string[], _timeoutMs: number) => passingProof);
+    const prover = createArm64WslDockerDesktopGpuProver({
+      platform: "linux",
+      arch: "arm64",
+      detectWslDockerDesktopStatus: () => "docker-desktop",
+      runProof,
+      log: () => undefined,
+    });
+    expect(prover(["JMJWOA-Generic-GPU"])).toEqual(passingProof);
+    expect(runProof).toHaveBeenCalledTimes(1);
+    const argv = runProof.mock.calls[0]?.[0] ?? [];
+    expect(argv[0]).toBe("docker");
+    expect(argv).toContain("--gpus");
+  });
+
+  it("uses an arch-correct CUDA sample image (not the amd64-only nbody) on this ARM64 path", () => {
+    // The proof only runs on ARM64, so the image must ship a real aarch64 CUDA
+    // binary. `cuda-sample:nbody` packs an x86-64 binary in its arm64 tag and
+    // fails with `exec format error` on the N1X target (#4565); the chosen
+    // vectorAdd image ships a genuine aarch64 binary.
+    expect(WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND).toContain("cuda-sample:vectoradd");
+    expect(WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND).not.toContain("nbody");
+  });
+
+  it("propagates a failing proof so detection stays fail-closed", () => {
+    const failing = { passed: false, timedOut: false, exitCode: 1, diagnostic: "no CUDA device" };
+    const prover = createArm64WslDockerDesktopGpuProver({
+      platform: "linux",
+      arch: "arm64",
+      detectWslDockerDesktopStatus: () => "docker-desktop",
+      runProof: () => failing,
+      log: () => undefined,
+    });
+    expect(prover(["JMJWOA-Generic-GPU"])?.passed).toBe(false);
+  });
+
+  it("flags an exec-format-error proof as an image-arch problem, not a missing GPU (#4565)", () => {
+    const execFormatFailure = {
+      passed: false,
+      timedOut: false,
+      exitCode: 1,
+      diagnostic: "exec /cuda-samples/sample: exec format error",
+    };
+    const logs: string[] = [];
+    const prover = createArm64WslDockerDesktopGpuProver({
+      platform: "linux",
+      arch: "arm64",
+      detectWslDockerDesktopStatus: () => "docker-desktop",
+      runProof: () => execFormatFailure,
+      log: (message) => logs.push(message),
+    });
+    // Still fail-closed (no false positive), but the operator-facing message
+    // must distinguish an image-architecture bug from a missing GPU.
+    expect(prover(["JMJWOA-Generic-GPU"])?.passed).toBe(false);
+    const combined = logs.join("\n");
+    expect(combined).toContain("architecture");
+    expect(combined).not.toContain("treating GPU as unproven");
+  });
+
+  it("honors a positive NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS override", () => {
+    expect(wslDockerDesktopGpuProofTimeoutMs({ NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS: "5000" })).toBe(5000);
+    expect(wslDockerDesktopGpuProofTimeoutMs({})).toBeGreaterThan(0);
+    expect(wslDockerDesktopGpuProofTimeoutMs({ NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS: "-1" })).toBeGreaterThan(
+      0,
+    );
+  });
+
+  it("detects Docker exec-format-error diagnostics", () => {
+    expect(isExecFormatErrorDiagnostic("exec /cuda-samples/sample: exec format error")).toBe(true);
+    expect(isExecFormatErrorDiagnostic("standard_init_linux.go: exec format error")).toBe(true);
+    expect(isExecFormatErrorDiagnostic("no CUDA-capable device is detected")).toBe(false);
+    expect(isExecFormatErrorDiagnostic(null)).toBe(false);
+    expect(isExecFormatErrorDiagnostic(undefined)).toBe(false);
+  });
+});
diff --git a/src/lib/onboard/wsl-docker-desktop-gpu.ts b/src/lib/onboard/wsl-docker-desktop-gpu.ts
index a8870bbac3..c30e6b43c4 100644
--- a/src/lib/onboard/wsl-docker-desktop-gpu.ts
+++ b/src/lib/onboard/wsl-docker-desktop-gpu.ts
@@ -3,12 +3,38 @@
 
 import fs from "node:fs";
 import os from "node:os";
-
 import { dockerInfoFormat as defaultDockerInfoFormat } from "../adapters/docker";
+import type {
+  Arm64WslDockerDesktopGpuProver,
+  DockerGpuProofResult,
+} from "../inference/gpu-trust";
 
 const WSL_DOCKER_DESKTOP_DETECTION_TIMEOUT_MS = 30_000;
+// This prover only ever runs on ARM64 (see `createArm64WslDockerDesktopGpuProver`),
+// so the proof image MUST ship a real aarch64 CUDA binary. The older
+// `cuda-sample:nbody` image is unusable here: its arm64 manifest entry actually
+// contains an x86-64 ELF, so on the N1X Windows-ARM target it fails with
+// `exec /cuda-samples/sample: exec format error` (#4565). `vectoradd-cuda12.5.0`
+// ships a genuine aarch64 binary and runs a real CUDA kernel (device alloc +
+// add + result verification), which is a strong usability proof that still
+// fails closed on the Snapdragon nvidia-smi shim (no usable CUDA device, #3988).
+// The image's entrypoint runs vectorAdd directly, so no trailing args are needed.
 export const WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND =
-  "docker run --rm --gpus all nvcr.io/nvidia/k8s/cuda-sample:nbody nbody -gpu -benchmark";
+  "docker run --rm --gpus all nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0";
+
+// The proof runs a real CUDA workload and may first pull the CUDA sample image,
+// so it is bounded generously (3 min) rather than with the 30s detection
+// timeout. Operators on slow links can override via
+// NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS. The timeout is the safety bound that keeps
+// onboarding from hanging if Docker Desktop GPU passthrough stalls.
+const WSL_DOCKER_DESKTOP_GPU_PROOF_DEFAULT_TIMEOUT_MS = 180_000;
+
+export function wslDockerDesktopGpuProofTimeoutMs(
+  env: NodeJS.ProcessEnv = process.env,
+): number {
+  const raw = Number(env.NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS);
+  return Number.isFinite(raw) && raw > 0 ? raw : WSL_DOCKER_DESKTOP_GPU_PROOF_DEFAULT_TIMEOUT_MS;
+}
 
 // Source-of-truth for this compatibility branch: Docker Desktop-backed WSL can
 // advertise Docker CDI directories while the WSL distro cannot see a usable
@@ -115,6 +141,100 @@ export function wslDockerDesktopGpuCompatibilityRemediationLines(
   return null;
 }
 
+export type Arm64WslDockerDesktopGpuProverDeps = WslDockerDesktopDetectionDeps & {
+  arch?: string;
+  detectWslDockerDesktopStatus?: (deps: WslDockerDesktopDetectionDeps) => WslDockerDesktopStatus;
+  runProof?: (argv: string[], timeoutMs: number) => DockerGpuProofResult;
+  log?: (message: string) => void;
+};
+
+// Split the fixed proof command constant into an argv. The command is repo-
+// controlled and contains no quoting, so a whitespace split is exact and avoids
+// routing the bounded proof through a shell.
+function wslDockerDesktopGpuProofArgv(): string[] {
+  return WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND.split(/\s+/).filter(Boolean);
+}
+
+// Docker reports an architecture mismatch (proof image built for a different
+// CPU than the host) as `exec ...: exec format error`. On this ARM64-only path
+// that means the proof image's binary is not aarch64 — a packaging/image bug,
+// not a "no GPU" condition — so we must not let it read as a missing GPU (#4565).
+export function isExecFormatErrorDiagnostic(diagnostic: string | null | undefined): boolean {
+  return typeof diagnostic === "string" && /exec format error/i.test(diagnostic);
+}
+
+function runWslDockerDesktopGpuProof(argv: string[], timeoutMs: number): DockerGpuProofResult {
+  try {
+    // Lazy require: keeps this onboard module from statically pulling in the
+    // runner (and its transitive platform require) at import time.
+    const { runCaptureEx } = require("../runner") as typeof import("../runner");
+    const result = runCaptureEx(argv, { timeout: timeoutMs });
+    // Docker daemon errors ("could not select device driver") and CUDA-sample
+    // failures ("no CUDA-capable device is detected") are written to stderr, so
+    // prefer it for the diagnostic and fall back to stdout (vectorAdd output).
+    const diagnosticSource = result.stderr || result.stdout;
+    return {
+      passed: result.exitCode === 0 && !result.timedOut,
+      timedOut: result.timedOut,
+      exitCode: result.exitCode,
+      diagnostic: diagnosticSource.slice(0, 300),
+    };
+  } catch (err) {
+    return {
+      passed: false,
+      timedOut: false,
+      exitCode: null,
+      diagnostic: err instanceof Error ? err.message.slice(0, 300) : String(err).slice(0, 300),
+    };
+  }
+}
+
+// Build the ARM64 WSL Docker Desktop GPU prover consumed by `detectGpu()` for
+// denylisted `JMJWOA-Generic-*` names (#4565). Returns `null` for any host that
+// is not ARM64 Linux on Docker Desktop-backed WSL, so the #3988/#4424 fail-
+// closed default is preserved everywhere else. When the host IS a candidate it
+// runs one bounded Docker `--gpus` CUDA workload (the aarch64 vectorAdd sample):
+// a real N1X GPU passes, while the Snapdragon nvidia-smi shim — which has no
+// usable CUDA device — cannot, so the placeholder name alone is never trusted.
+export function createArm64WslDockerDesktopGpuProver(
+  deps: Arm64WslDockerDesktopGpuProverDeps = {},
+): Arm64WslDockerDesktopGpuProver {
+  const log = deps.log ?? ((message: string) => console.log(message));
+  const detectStatus = deps.detectWslDockerDesktopStatus ?? detectWslDockerDesktopStatus;
+  const runProof = deps.runProof ?? runWslDockerDesktopGpuProof;
+  return function proveArm64WslDockerDesktopGpu(
+    gpuNames: string[],
+  ): DockerGpuProofResult | null {
+    const platform = deps.platform ?? process.platform;
+    const arch = deps.arch ?? process.arch;
+    if (platform !== "linux" || arch !== "arm64") return null;
+    if (detectStatus(deps) !== "docker-desktop") return null;
+    const names = gpuNames.filter(Boolean).join(", ") || "generic ARM64 GPU";
+    log(
+      `  Running bounded Docker Desktop WSL GPU proof for ${names} (may pull a CUDA sample image)...`,
+    );
+    log(`    ${WSL_DOCKER_DESKTOP_GPU_PROOF_COMMAND}`);
+    const result = runProof(wslDockerDesktopGpuProofArgv(), wslDockerDesktopGpuProofTimeoutMs(deps.env));
+    if (result.passed) {
+      log("  ✓ Docker Desktop WSL GPU proof passed; trusting the reported GPU.");
+    } else if (result.timedOut) {
+      log("  ✗ Docker Desktop WSL GPU proof timed out; treating GPU as unproven (CPU fallback).");
+      log("    Rerun with --no-gpu to skip GPU passthrough, or raise NEMOCLAW_WSL_GPU_PROOF_TIMEOUT_MS.");
+    } else if (isExecFormatErrorDiagnostic(result.diagnostic)) {
+      // The proof binary's architecture did not match the host. This is an image
+      // problem, not a GPU problem, so call it out explicitly rather than letting
+      // the host fall back to CPU as if no GPU were present (#4565).
+      log("  ✗ Docker Desktop WSL GPU proof could not run: CUDA sample image architecture does not");
+      log("    match this host (exec format error). This is a proof-image issue, not a missing GPU.");
+      log("    Rerun with --no-gpu to skip GPU passthrough, or report this so the proof image can be fixed.");
+    } else {
+      log("  ✗ Docker Desktop WSL GPU proof failed; treating GPU as unproven (CPU fallback).");
+      log("    Rerun with --no-gpu to skip GPU passthrough.");
+    }
+    return result;
+  };
+}
+
 export function wslDockerDesktopGpuCompatibilityAction(): WslDockerDesktopGpuCompatibilityAction {
   return {
     id: "wsl_docker_desktop_gpu_compatibility",
diff --git a/src/lib/runner.ts b/src/lib/runner.ts
index bda565a579..1503eed20a 100644
--- a/src/lib/runner.ts
+++ b/src/lib/runner.ts
@@ -265,6 +265,10 @@ const { redact, redactError, writeRedactedResult } = require("./security/redact"
 /** Structured result returned by runCaptureEx. */
 export interface CaptureResult {
   stdout: string;
+  /** Captured stderr, trimmed. Many tools (docker, CUDA samples) write their
+   * actionable failure text here, so callers building diagnostics need it.
+   * Optional so existing `runCaptureEx` test seams stay source-compatible. */
+  stderr?: string;
   exitCode: number | null;
   /** True when spawnSync sets result.error due to a timeout (ETIMEDOUT). */
   timedOut: boolean;
@@ -299,8 +303,10 @@ function runCaptureEx(cmd: readonly string[], opts: Omit<CaptureOptions, "ignore
       (result.error != null && (result.error as NodeJS.ErrnoException).code === "ETIMEDOUT") ||
       result.status === 28;
     const stdout = result.stdout || "";
+    const stderr = result.stderr || "";
     return {
       stdout: (typeof stdout === "string" ? stdout : stdout.toString("utf-8")).trim(),
+      stderr: (typeof stderr === "string" ? stderr : stderr.toString("utf-8")).trim(),
       exitCode: result.status,
       timedOut,
     };
diff --git a/src/lib/state/registry.ts b/src/lib/state/registry.ts
index d1c69e5deb..fc9ce25b18 100644
--- a/src/lib/state/registry.ts
+++ b/src/lib/state/registry.ts
@@ -3,10 +3,9 @@
 
 import fs from "node:fs";
 import path from "node:path";
-
-import { ensureConfigDir, readConfigFile, writeConfigFile } from "./config-io";
 import { isErrnoException } from "../core/errno";
 import type { MessagingChannelConfig } from "../messaging-channel-config";
+import { ensureConfigDir, readConfigFile, writeConfigFile } from "./config-io";
 
 export interface CustomPolicyEntry {
   name: string;
@@ -15,6 +14,25 @@ export interface CustomPolicyEntry {
   appliedAt?: string;
 }
 
+// Outcome of the last live sandbox GPU proof run during onboarding/recovery.
+// `status` separates a configured-but-unverified GPU from one whose CUDA
+// usability was actually proven (`verified`) or actively failed a live proof
+// (`failed`, e.g. Jetson `/dev/nvmap` permission errors). Persisted so
+// `nemoclaw <sandbox> status` can report proof state instead of treating any
+// configured GPU as healthy (#4231).
+export type SandboxGpuProofStatus = "verified" | "unverified" | "failed";
+
+export interface SandboxGpuProofResult {
+  status: SandboxGpuProofStatus;
+  // True only when a CUDA-usability proof (cuInit via libcuda) actually passed.
+  cudaVerified: boolean;
+  // Label of the last proof that determined `status`.
+  label?: string | null;
+  // Redacted, truncated diagnostic captured when the proof failed.
+  detail?: string | null;
+  at: string;
+}
+
 export interface SandboxEntry {
   name: string;
   createdAt?: string;
@@ -26,6 +44,7 @@ export interface SandboxEntry {
   sandboxGpuEnabled?: boolean;
   sandboxGpuMode?: "auto" | "1" | "0" | string | null;
   sandboxGpuDevice?: string | null;
+  sandboxGpuProof?: SandboxGpuProofResult | null;
   openshellDriver?: string | null;
   openshellVersion?: string | null;
   policies?: string[];
@@ -218,6 +237,7 @@ export function registerSandbox(entry: SandboxEntry): void {
       sandboxGpuEnabled: entry.sandboxGpuEnabled === true,
       sandboxGpuMode: entry.sandboxGpuMode || null,
       sandboxGpuDevice: entry.sandboxGpuDevice || null,
+      sandboxGpuProof: entry.sandboxGpuProof ?? null,
       openshellDriver: entry.openshellDriver || null,
       openshellVersion: entry.openshellVersion || null,
       policies: entry.policies || [],
diff --git a/test/e2e/test-gpu-e2e.sh b/test/e2e/test-gpu-e2e.sh
index bda0b4a29d..a6d7af0589 100755
--- a/test/e2e/test-gpu-e2e.sh
+++ b/test/e2e/test-gpu-e2e.sh
@@ -285,6 +285,16 @@ if status_output=$(nemoclaw "$SANDBOX_NAME" status 2>&1); then
   else
     fail "Sandbox GPU is not enabled in status output"
   fi
+  # #4231: status must report proven CUDA usability, not a bare "enabled". On a
+  # working GPU host the onboarding cuInit proof passes, so status should carry
+  # the "(CUDA verified)" suffix rather than "(CUDA unverified)" or a failure.
+  if echo "$status_output" | grep -Fq "CUDA verified"; then
+    pass "Sandbox GPU status reports CUDA verified"
+  elif echo "$status_output" | grep -Eq "CUDA unverified|last CUDA proof failed"; then
+    fail "Sandbox GPU status shows CUDA not proven on a working GPU host"
+  else
+    skip "Sandbox GPU CUDA proof state not present in status output"
+  fi
 else
   fail "Could not read sandbox GPU status"
 fi