diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 2985e19daf..e35e44c000 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -141,6 +141,7 @@ const { detectVllmProfile, installVllm } = require("./inference/vllm"); const inferenceConfig: typeof import("./inference/config") = require("./inference/config"); const { DEFAULT_CLOUD_MODEL, + DEFAULT_ROUTE_CREDENTIAL_ENV, INFERENCE_ROUTE_URL, MANAGED_PROVIDER_ID, getProviderSelectionConfig, @@ -1869,6 +1870,74 @@ function providerExistsInGateway(name: string) { return onboardProviders.providerExistsInGateway(name, runOpenshell); } +/** + * Resolve a persisted OpenShell provider name back to its onboard provider config. + */ +function getRemoteProviderConfigForName( + provider: string | null | undefined, +): RemoteProviderConfigEntry | null { + if (!provider) return null; + if (provider === "nvidia-nim") return REMOTE_PROVIDER_CONFIG.build; + return ( + Object.values(REMOTE_PROVIDER_CONFIG).find((entry) => entry.providerName === provider) || null + ); +} + +/** + * Choose the credential env used to recreate a missing provider during resume. + */ +function getResumeProviderCredentialEnv( + provider: string, + config: RemoteProviderConfigEntry | null, + credentialEnv: string | null | undefined, +): string { + if (credentialEnv) return credentialEnv; + if (config?.credentialEnv) return config.credentialEnv; + return isRoutedInferenceProvider(provider) ? DEFAULT_ROUTE_CREDENTIAL_ENV : ""; +} + +/** + * Ensure a resumed remote provider still exists, re-prompting for credentials when needed. + */ +async function ensureResumeProviderReady( + provider: string | null | undefined, + credentialEnv: string | null | undefined, +): Promise<{ forceInferenceSetup: boolean }> { + const config = getRemoteProviderConfigForName(provider); + if (!provider || (!config && !isRoutedInferenceProvider(provider))) { + return { forceInferenceSetup: false }; + } + if (providerExistsInGateway(provider)) return { forceInferenceSetup: false }; + + const resolvedCredentialEnv = getResumeProviderCredentialEnv(provider, config, credentialEnv); + const credentialValue = hydrateCredentialEnv(resolvedCredentialEnv); + const providerLabel = config?.label || getProviderLabel(provider) || provider; + const helpUrl = config?.helpUrl || null; + if (!credentialValue) { + if (isNonInteractive()) { + console.error( + ` ${resolvedCredentialEnv} is required to recreate provider '${provider}' during resume.`, + ); + console.error( + ` Re-run without --non-interactive to enter it, or set ${resolvedCredentialEnv} and retry.`, + ); + process.exit(1); + } + console.log(""); + console.log(` [resume] Provider '${provider}' is missing from the gateway.`); + console.log(" Re-enter the API key so onboarding can recreate it before rebuilding."); + await replaceNamedCredential( + resolvedCredentialEnv, + `${providerLabel} API key`, + helpUrl, + (value) => validateNvidiaApiKeyValue(value, resolvedCredentialEnv), + ); + } else { + note(` [resume] Provider '${provider}' is missing from the gateway; recreating it.`); + } + return { forceInferenceSetup: true }; +} + function getMessagingChannelForEnvKey(envKey: string): string | null { if (envKey === "DISCORD_BOT_TOKEN") return "discord"; if (envKey === "SLACK_BOT_TOKEN") return "slack"; @@ -9957,6 +10026,7 @@ async function onboard(opts: OnboardOptions = {}): Promise { let nimContainer = session?.nimContainer || null; let webSearchConfig = session?.webSearchConfig || null; let forceProviderSelection = false; + let forceInferenceSetup = false; while (true) { const resumeProviderSelection = !forceProviderSelection && @@ -9965,9 +10035,12 @@ async function onboard(opts: OnboardOptions = {}): Promise { typeof provider === "string" && typeof model === "string"; if (resumeProviderSelection) { + const resumeProvider = await ensureResumeProviderReady(provider, credentialEnv); + forceInferenceSetup = resumeProvider.forceInferenceSetup; skippedStepMessage("provider_selection", `${provider} / ${model}`); hydrateCredentialEnv(credentialEnv); } else { + forceInferenceSetup = false; // #2753: do not persist sandboxName to onboard-session.json before // the sandbox actually exists in the gateway (Step 6 markStepComplete // below). A SIGINT between any earlier step and createSandbox would @@ -10000,7 +10073,10 @@ async function onboard(opts: OnboardOptions = {}): Promise { } process.env.NEMOCLAW_OPENSHELL_BIN = getOpenshellBinary(); const resumeInference = - !forceProviderSelection && resume && isInferenceRouteReady(provider, model); + !forceProviderSelection && + !forceInferenceSetup && + resume && + isInferenceRouteReady(provider, model); if (resumeInference) { if (isRoutedInferenceProvider(provider)) { try { @@ -10482,6 +10558,7 @@ module.exports = { printSandboxCreateRecoveryHints, promptYesNoOrDefault, providerExistsInGateway, + ensureResumeProviderReady, parsePolicyPresetEnv, parseSandboxStatus, pruneStaleSandboxEntry, diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 6042f3828a..8f32365ca9 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -113,6 +113,10 @@ type OnboardTestInternals = { flavor: "openai" | "anthropic", ) => string; providerNameToOptionKey: (name?: string | null) => string | null; + ensureResumeProviderReady: ( + provider?: string | null, + credentialEnv?: string | null, + ) => Promise<{ forceInferenceSetup: boolean }>; parsePolicyPresetEnv: (value: string | null) => string[]; patchStagedDockerfile: ShimFn; pullAndResolveBaseImageDigest: () => { digest: string; ref: string } | null; @@ -163,6 +167,7 @@ function isOnboardTestInternals( typeof value.formatSandboxBuildEstimateNote === "function" && Object.prototype.hasOwnProperty.call(value, "providerNameToOptionKey") && typeof value.providerNameToOptionKey === "function" && + typeof value.ensureResumeProviderReady === "function" && typeof value.shouldRunCompatibleEndpointSandboxSmoke === "function" && typeof value.writeSandboxConfigSyncFile === "function" ); @@ -218,6 +223,7 @@ const { isLoopbackHostname, normalizeProviderBaseUrl, providerNameToOptionKey, + ensureResumeProviderReady, parsePolicyPresetEnv, patchStagedDockerfile, pullAndResolveBaseImageDigest, @@ -411,6 +417,84 @@ describe("onboard helpers", () => { ); }); + it("re-prompts and forces inference setup when a resumed remote provider was reset", () => { + const repoRoot = path.join(import.meta.dirname, ".."); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-resume-provider-reset-")); + const fakeBin = path.join(tmpDir, "bin"); + const scriptPath = path.join(tmpDir, "resume-provider-reset.js"); + const onboardPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "onboard.js")); + const credentialsPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "credentials", "store.js")); + const runnerPath = JSON.stringify(path.join(repoRoot, "dist", "lib", "runner.js")); + const openshellPath = path.join(fakeBin, "openshell"); + + try { + fs.mkdirSync(fakeBin, { recursive: true }); + fs.writeFileSync(openshellPath, "#!/usr/bin/env bash\nexit 0\n", { mode: 0o755 }); + + const script = ` +const credentials = require(${credentialsPath}); +const runner = require(${runnerPath}); +const calls = []; +const saved = []; + +runner.run = (command) => { + calls.push(Array.isArray(command) ? command.slice(1).join(" ") : String(command)); + if (Array.isArray(command) && command.includes("provider") && command.includes("get")) { + return { status: 1, stdout: "", stderr: "provider not found" }; + } + return { status: 0, stdout: "", stderr: "" }; +}; +credentials.resolveProviderCredential = () => null; +credentials.prompt = async () => "fresh-compatible-key"; +credentials.saveCredential = (name, value) => saved.push({ name, value }); + +process.env.NEMOCLAW_OPENSHELL_BIN = ${JSON.stringify(openshellPath)}; +delete process.env.COMPATIBLE_API_KEY; + +const { ensureResumeProviderReady } = require(${onboardPath}); +(async () => { + const result = await ensureResumeProviderReady("compatible-endpoint", "COMPATIBLE_API_KEY"); + console.log(JSON.stringify({ + result, + saved, + envValue: process.env.COMPATIBLE_API_KEY, + providerGet: calls.some((call) => call === "provider get compatible-endpoint"), + })); +})().catch((error) => { + console.error(error); + process.exit(1); +}); +`; + fs.writeFileSync(scriptPath, script); + + const result = spawnSync(process.execPath, [scriptPath], { + cwd: repoRoot, + encoding: "utf-8", + env: { + ...process.env, + HOME: tmpDir, + PATH: `${fakeBin}:${process.env.PATH || ""}`, + }, + }); + + assert.equal(result.status, 0, result.stderr); + const payload = parseStdoutJson<{ + result: { forceInferenceSetup: boolean }; + saved: Array<{ name: string; value: string }>; + envValue: string; + providerGet: boolean; + }>(result.stdout); + assert.equal(payload.providerGet, true); + assert.deepEqual(payload.result, { forceInferenceSetup: true }); + assert.deepEqual(payload.saved, [ + { name: "COMPATIBLE_API_KEY", value: "fresh-compatible-key" }, + ]); + assert.equal(payload.envValue, "fresh-compatible-key"); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + it("uses explicit messaging selections for policy suggestions when provided", () => { const originalTelegramBotToken = process.env.TELEGRAM_BOT_TOKEN; const originalDiscordBotToken = process.env.DISCORD_BOT_TOKEN;