Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/lib/actions/sandbox/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ async function autoCreateSandboxFromSource(
// dst has its own lifecycle; don't inherit src's local NIM container
// reference, or destroying dst would stop src's NIM.
nimContainer: null,
// No CUDA proof has run for dst (this auto-create path passes no GPU flags),
// so clear src's proof rather than inheriting it — otherwise dst could show
// `Sandbox GPU: enabled (CUDA verified)` based on another sandbox's run (#4231).
sandboxGpuProof: null,
});

console.log(` ${G}\u2713${R} Sandbox '${dstName}' created`);
Expand Down
6 changes: 5 additions & 1 deletion src/lib/actions/sandbox/status-snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ import {
import { probeSandboxInferenceGatewayHealth } from "./process-recovery";
import {
getSandboxStatusPreflight,
withoutTerminalPhasePreflight,
type SandboxStatusFailureLayer,
withoutTerminalPhasePreflight,
} from "./status-preflight";

type ProbeProviderHealth = (
Expand Down Expand Up @@ -84,6 +84,9 @@ export interface SandboxStatusReport {
sandboxGpuEnabled: boolean;
sandboxGpuMode: string | null;
sandboxGpuDevice: string | null;
// Last recorded CUDA-usability proof so `status` can distinguish a configured
// GPU from a proven-usable one instead of reporting any GPU as healthy (#4231).
sandboxGpuProof: registry.SandboxGpuProofResult | null;
openshellDriver: string;
openshellVersion: string;
policies: string[];
Expand Down Expand Up @@ -222,6 +225,7 @@ export async function getSandboxStatusReport(
sandboxGpuEnabled,
sandboxGpuMode: (sb && sb.sandboxGpuMode) || null,
sandboxGpuDevice: (sb && sb.sandboxGpuDevice) || null,
sandboxGpuProof: (sb && sb.sandboxGpuProof) || null,
openshellDriver: (sb && sb.openshellDriver) || "unknown",
openshellVersion: (sb && sb.openshellVersion) || "unknown",
policies,
Expand Down
40 changes: 38 additions & 2 deletions src/lib/actions/sandbox/status.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
// SPDX-License-Identifier: Apache-2.0

import { describe, expect, it } from "vitest";

import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";
import {
classifySandboxContainerFailureForStatus,
classifySandboxStatusPreflightFailure,
getSandboxStatusInferenceHealth,
isDockerDaemonUnreachableForStatus,
maybeGetSandboxStatusInferenceHealth,
sandboxGpuProofStatusSuffix,
sandboxGpuProofUnverified,
} from "../../../../dist/lib/actions/sandbox/status";
import type { ProviderHealthProbeOptions } from "../../../../dist/lib/inference/health";

describe("sandbox status inference health", () => {
it("passes the current model with the current provider", () => {
Expand Down Expand Up @@ -276,3 +277,38 @@ describe("classifySandboxStatusPreflightFailure", () => {
expect(result).toBeNull();
});
});

describe("sandbox GPU proof status rendering (#4231)", () => {
it("does not call an unproven GPU healthy", () => {
expect(sandboxGpuProofUnverified(null)).toBe(true);
expect(sandboxGpuProofUnverified(undefined)).toBe(true);
expect(
sandboxGpuProofUnverified({ status: "unverified", cudaVerified: false, at: "t" }),
).toBe(true);
expect(
sandboxGpuProofUnverified({ status: "verified", cudaVerified: true, at: "t" }),
).toBe(false);
expect(
sandboxGpuProofUnverified({ status: "failed", cudaVerified: false, at: "t" }),
).toBe(false);
});

it("renders verified / unverified / failed suffixes distinctly", () => {
expect(
sandboxGpuProofStatusSuffix({ status: "verified", cudaVerified: true, at: "t" }),
).toContain("CUDA verified");
// No recorded proof (older entries) must not read as healthy.
expect(sandboxGpuProofStatusSuffix(null)).toContain("CUDA unverified");
expect(
sandboxGpuProofStatusSuffix({ status: "unverified", cudaVerified: false, at: "t" }),
).toContain("CUDA unverified");
const failed = sandboxGpuProofStatusSuffix({
status: "failed",
cudaVerified: false,
label: "cuInit(0)",
at: "t",
});
expect(failed).toContain("last CUDA proof failed");
expect(failed).toContain("cuInit(0)");
});
});
50 changes: 45 additions & 5 deletions src/lib/actions/sandbox/status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import * as nim from "../../inference/nim";
import * as sandboxVersion from "../../sandbox/version";
import * as shields from "../../shields";
import { isTerminalSandboxPhase, parseSandboxPhase } from "../../state/gateway";
import type { SandboxGpuProofResult } from "../../state/registry";
import * as registry from "../../state/registry";
import {
createSystemDeps as createSessionDeps,
Expand All @@ -28,26 +29,26 @@ import {
printWrongGatewayActiveGuidance,
} from "./gateway-state";
import { isSandboxGatewayRunningForStatus } from "./process-recovery";
import { collectSandboxStatusSnapshot } from "./status-snapshot";
import {
getSandboxStatusPreflight,
printGatewayFailureLayerHeader,
printSandboxStatusPreflightHeader,
withoutTerminalPhasePreflight,
} from "./status-preflight";
import { collectSandboxStatusSnapshot } from "./status-snapshot";

export {
type ClassifySandboxStatusPreflightFailureDeps,
classifySandboxContainerFailureForStatus,
classifySandboxStatusPreflightFailure,
isDockerDaemonUnreachableForStatus,
getSandboxStatusPreflight,
isDockerDaemonUnreachableForStatus,
printGatewayFailureLayerHeader,
printSandboxStatusPreflightHeader,
withoutTerminalPhasePreflight,
type ClassifySandboxStatusPreflightFailureDeps,
type SandboxStatusFailureLayer,
type SandboxStatusPreflightFailure,
type SandboxStatusPreflightResult,
withoutTerminalPhasePreflight,
} from "./status-preflight";
export {
collectSandboxStatusSnapshot,
Expand All @@ -68,6 +69,29 @@ function shouldProbeSandboxRuntimeVersion(
return lookup.state === "present" && Boolean(sandbox.agentVersion);
}

// True when sandbox GPU is enabled but no CUDA-usability proof has confirmed it
// (older entries with no recorded proof, or a run whose CUDA proof could not
// execute). Treated as not-yet-proven rather than healthy (#4231).
export function sandboxGpuProofUnverified(
proof: SandboxGpuProofResult | null | undefined,
): boolean {
return !proof || proof.status === "unverified";
}

// Render the proof-state suffix appended to the `Sandbox GPU: enabled` line so
// the status reflects verified/unverified/failed CUDA usability instead of
// reporting any configured GPU as healthy (#4231).
export function sandboxGpuProofStatusSuffix(
proof: SandboxGpuProofResult | null | undefined,
): string {
if (proof?.status === "verified") return ` ${G}(CUDA verified)${R}`;
if (proof?.status === "failed") {
const label = proof.label ? `: ${proof.label}` : "";
return ` ${RD}(last CUDA proof failed${label})${R}`;
}
return ` ${YW}(CUDA unverified)${R}`;
}

/**
* Render one Inference status line. The main probe and each subprobe go
* through this helper so multi-hop providers (e.g. ollama-local backend +
Expand Down Expand Up @@ -183,10 +207,26 @@ export async function showSandboxStatus(sandboxName: string): Promise<void> {
const sandboxGpu = sandboxGpuEnabled ? "enabled" : "disabled";
const sandboxGpuMode = sb.sandboxGpuMode ? ` (${sb.sandboxGpuMode})` : "";
const sandboxGpuDevice = sb.sandboxGpuDevice ? ` device=${sb.sandboxGpuDevice}` : "";
const sandboxGpuProofSuffix = sandboxGpuEnabled
? sandboxGpuProofStatusSuffix(sb.sandboxGpuProof)
: "";
const openshellDriver = sb.openshellDriver || "unknown";
const openshellVersion = sb.openshellVersion || "unknown";
console.log(` Host GPU: ${hostGpu}`);
console.log(` Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}`);
console.log(
` Sandbox GPU: ${sandboxGpu}${sandboxGpuMode}${sandboxGpuDevice}${sandboxGpuProofSuffix}`,
);
if (sandboxGpuEnabled && sb.sandboxGpuProof?.status === "failed") {
const detail = sb.sandboxGpuProof.detail;
if (detail) console.log(` ${detail}`);
console.log(
" CUDA failed a live proof. Recreate with corrected GPU device/group access, or rerun onboard with --no-gpu.",
);
} else if (sandboxGpuEnabled && sandboxGpuProofUnverified(sb.sandboxGpuProof)) {
console.log(
" CUDA usability has not been proven. Rerun onboard to verify, or use --no-gpu for CPU.",
);
}
console.log(` OpenShell: ${openshellVersion} (${openshellDriver})`);
console.log(` Policies: ${(sb.policies || []).join(", ") || "none"}`);

Expand Down
21 changes: 21 additions & 0 deletions src/lib/inference/gpu-trust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,27 @@ export function isDenylistedNvidiaGpuName(name: string): boolean {
return NVIDIA_GPU_NAME_DENYLIST_PATTERN.test(name);
}

// Result of a bounded Docker `--gpus` CUDA proof. `passed` is true only when a
// real CUDA workload (not just nvidia-smi) succeeded — that is the signal that
// distinguishes a genuine Windows-ARM N1X + WSL2 + Docker Desktop GPU (#4565)
// from the Windows-on-ARM Snapdragon nvidia-smi shim (#3988/#4424), which has
// no usable NVIDIA device and so cannot pass the workload.
export interface DockerGpuProofResult {
passed: boolean;
timedOut: boolean;
exitCode: number | null;
diagnostic: string;
}

// Optional accept-path used by `detectGpu()` when an ARM64 Linux host reports a
// denylisted `JMJWOA-Generic-*` placeholder. The prover returns `null` when the
// host is not a proof candidate (not ARM64 WSL Docker Desktop), preserving the
// #3988 fail-closed default; otherwise it returns the bounded Docker GPU proof
// outcome so a passing real GPU can be trusted without trusting the name alone.
export type Arm64WslDockerDesktopGpuProver = (
gpuNames: string[],
) => DockerGpuProofResult | null;

export function isPlausibleNvidiaGpuName(name: string): boolean {
return !!name && !isDenylistedNvidiaGpuName(name) && NVIDIA_GPU_NAME_PATTERN.test(name);
}
Expand Down
93 changes: 93 additions & 0 deletions src/lib/inference/nim.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,99 @@ describe("nim", () => {
}
});

// #4565: a real Windows-ARM N1X + WSL2 + Docker Desktop host reports the
// same `JMJWOA-Generic-*` placeholder as the Snapdragon shim, but it can
// pass a bounded Docker `--gpus` CUDA proof. When the injected prover
// confirms the proof, the denylisted name is accepted and the detection is
// tagged so the sandbox preflight reaches the Docker Desktop WSL branch.
it("accepts a denylisted ARM64 GPU when the bounded Docker GPU proof passes (#4565)", () => {
const runCapture = vi.fn((cmd: string | string[]) => {
if (!Array.isArray(cmd)) throw new Error("expected argv array");
if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
return "JMJWOA-Generic-GPU, 65471, 65000\n";
}
return "";
});
const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
const proveArm64WslDockerDesktopGpu = vi.fn(() => ({
passed: true,
timedOut: false,
exitCode: 0,
diagnostic: "",
}));

try {
withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
const result = nimModule.detectGpu({ proveArm64WslDockerDesktopGpu });
expect(result).toMatchObject({
type: "nvidia",
name: "JMJWOA-Generic-GPU",
count: 1,
totalMemoryMB: 65471,
wslDockerDesktopGpuProofPassed: true,
});
expect(proveArm64WslDockerDesktopGpu).toHaveBeenCalledWith(["JMJWOA-Generic-GPU"]);
});
} finally {
restore();
}
});

// Snapdragon WoA fail-closed: the same placeholder name, but the bounded
// CUDA proof fails because there is no usable NVIDIA device. The detection
// must stay null so #3988/#4424 is not reopened.
it("keeps rejecting a denylisted ARM64 GPU when the Docker GPU proof fails (#4565/#3988)", () => {
const runCapture = vi.fn((cmd: string | string[]) => {
if (!Array.isArray(cmd)) throw new Error("expected argv array");
if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
return "JMJWOA-Generic-GPU, 65471, 65000\n";
}
return "";
});
const { nimModule, restore } = loadNimWithMockedRunner(runCapture);
const failingProver = vi.fn(() => ({
passed: false,
timedOut: false,
exitCode: 1,
diagnostic: "no CUDA-capable device is detected",
}));
const notCandidateProver = vi.fn(() => null);

try {
withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: failingProver })).toBeNull();
// A host that is not an ARM64 WSL Docker Desktop candidate returns
// null from the prover and must also fail closed (no proof attempted).
expect(
nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: notCandidateProver }),
).toBeNull();
});
} finally {
restore();
}
});

// When no prover is wired (deps explicitly null), the denylist stays
// fail-closed exactly as before the #4565 accept-path existed.
it("rejects a denylisted ARM64 GPU when no Docker GPU prover is provided", () => {
const runCapture = vi.fn((cmd: string | string[]) => {
if (!Array.isArray(cmd)) throw new Error("expected argv array");
if (cmd[0] === "nvidia-smi" && cmd.some((a: string) => a.includes("name,memory.total"))) {
return "JMJWOA-Generic-GPU, 65471, 65000\n";
}
return "";
});
const { nimModule, restore } = loadNimWithMockedRunner(runCapture);

try {
withFirmwareModel("Microsoft Corporation Virtual Machine", () => {
expect(nimModule.detectGpu({ proveArm64WslDockerDesktopGpu: null })).toBeNull();
});
} finally {
restore();
}
});

// Trust-tier gate: on ARM64 Linux with generic firmware, the absence of
// `/proc/driver/nvidia/` is the Windows-on-ARM WSL shim profile and must
// be rejected even when the nvidia-smi probe returns a plausible-looking
Expand Down
Loading
Loading