hashangit · cbrown350 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,11 @@
 # ---- Dependencies ----
 node_modules/
 ui/node_modules/
+# This is a pnpm project — pnpm-lock.yaml is the source of truth.
+# Ignore npm's lockfile so an accidental `npm install` can't reintroduce a
+# second, divergent dependency graph (see CONTRIBUTING: "use pnpm, not npm").
+package-lock.json
+ui/package-lock.json
 
 # ---- Build output ----
 dist/
@@ -44,6 +49,9 @@ Thumbs.db
 .cursor/
 .codeium/
 .aider*
+# CONTEXT.md is a local session-continuity aid (per CLAUDE.md), not part of
+# the repo — keep it local-only so it never lands in a PR.
+CONTEXT.md
 # speckit: keep specs/ + memory + templates; ignore only ephemeral state
 .specify/artifacts/
 .specify/cache/

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -61,7 +61,7 @@ openfusion/
 │   │   ├── schema.ts       # zod schemas: candidates, judge, settings
 │   │   ├── store.ts        # read/write config.json + secrets.enc
 │   │   ├── crypto.ts       # AES-256-GCM, machine-bound master.key
-│   │   └── completeness.ts # isConfigured(): ≥2 candidates, judge set, all keys present
+│   │   └── completeness.ts # isConfigured(): ≥2 candidates, judge set, key for each referenced provider that needs one (keyless providers exempt)
 │   ├── providers/
 │   │   └── pi-ai-bridge.ts # getModel() + complete() wrapper; injects apiKey per call
 │   ├── store/
@@ -103,7 +103,7 @@ Progress emitted via `extra.sendNotification({ method: "notifications/progress",
 - **`secrets.enc`** (AES-256-GCM encrypted): `{ providers: { openai: {apiKey}, anthropic: {apiKey}, ... } }` — **one key per provider**, shared across all candidate slots + judge that use it (e.g. one OPENAI key, not one per slot).
 - **`master.key`** — random 256-bit key generated on first run, `chmod 600`. Machine-bound; used to encrypt/decrypt `secrets.enc`. (Simpler + sufficient for a local single-user tool; avoids native keychain deps.)
 
-`isConfigured()` = `candidates.length ≥ 2 && judge set && every referenced provider has a key`. Minimum **2**, maximum **5** candidates (enforced in schema + UI).
+`isConfigured()` = `candidates.length ≥ 2 && judge set && every referenced provider that needs a key has one` (keyless providers — e.g. the local `rapid-mlx` server — are exempt; keyed providers like `ollama-cloud` are not). Minimum **2**, maximum **5** candidates (enforced in schema + UI).
 
 ## Provider Layer (`@earendil-works/pi-ai`)
 
@@ -132,8 +132,9 @@ All on `127.0.0.1` only (holds keys — never expose externally). No CORS (same-
 | GET / PUT | `/api/config` | Read/write `config.json` (model choices + settings) |
 | GET | `/api/secrets` | Masked key **presence** per provider (never the raw key) |
 | PUT | `/api/secrets` | Set a provider's key (encrypted before write) |
-| GET | `/api/providers` | pi-ai `getProviders()` |
-| GET | `/api/providers/:p/models` | pi-ai `getModels(p)` |
+| GET | `/api/providers` | all providers (pi-ai built-ins + custom providers) with metadata |
+| GET | `/api/providers/:p/models` | pi-ai `getModels(p)` for built-ins; live `/v1/models` discovery for discoverable custom providers (returns `{models, error?}` on failure) |
+| GET | `/api/providers/:p/discover` | explicit retry for local discoverable providers (502 `{error}` on failure) |
 | POST | `/api/test` | Tiny pi-ai ping to validate a provider+model+key before save |
 | GET | `/api/stats` | Aggregated dashboard data (KPIs + by-model/by-day) |
 | GET | `/api/activity` | Paginated activity log, expandable to sub-calls |

diff --git a/src/config/completeness.ts b/src/config/completeness.ts
@@ -1,9 +1,17 @@
 // The configuration gate (Constitution VI).
 // isConfigured() = >=2 ENABLED candidates (<=5 unless benchmarkMode) +
-// >=1 ENABLED judge + a key for every referenced provider.
+// >=1 ENABLED judge + a key for every referenced provider that needs one.
+//
+// Keyless providers (e.g. the local rapid-mlx server) are EXEMPT from the
+// "key for every referenced provider" clause: they run without auth, so
+// requiring a stored key would block a valid local-only setup. This does not
+// weaken the gate — keyed providers (e.g. ollama-cloud) are still required to
+// have a stored key, and the >=2 candidates / >=1 judge rules are untouched.
+// See tests/custom-providers.test.ts "completeness gate with keyless providers".
 import type { RawConfig } from "./schema.js";
 import { referencedProviders, loadSecrets } from "./secrets.js";
 import { paths } from "../util/paths.js";
+import { KEYLESS_PROVIDERS } from "../providers/custom-providers.js";
 
 export interface CompletenessReport {
   configured: boolean;
@@ -28,7 +36,9 @@ export function isConfigured(config: RawConfig, secretsPath = paths.secrets(), k
   const referenced = referencedProviders(config);
   if (referenced.length > 0) {
     const secrets = loadSecrets(secretsPath, keyPath);
-    const missing = referenced.filter((p) => !secrets.providers[p]?.apiKey);
+    // Keyless providers (e.g. rapid-mlx) don't need an API key stored in secrets.
+    const needsKey = referenced.filter((p) => !KEYLESS_PROVIDERS.has(p));
+    const missing = needsKey.filter((p) => !secrets.providers[p]?.apiKey);
     if (missing.length > 0) reasons.push(`missing API key for provider(s): ${missing.join(", ")}`);
   }
 

diff --git a/src/fusion/fusion.ts b/src/fusion/fusion.ts
@@ -5,7 +5,7 @@ import { randomUUID } from "node:crypto";
 import type { RawConfig } from "../config/schema.js";
 import { isConfigured } from "../config/completeness.js";
 import { getKey } from "../config/secrets.js";
-import { resolveModel, type AnyModel } from "../providers/pi-ai-bridge.js";
+import { resolveModel, effectiveApiKey, type AnyModel } from "../providers/pi-ai-bridge.js";
 import { runParallelFanout, runSequentialFanout } from "./fanout.js";
 import type { WorkerResult } from "./worker.js";
 import { fusionStatusRegistry } from "./status.js";
@@ -220,7 +220,7 @@ export async function runFusion(input: FusionInput): Promise<FusionResult> {
     model: safeResolve(c.provider, c.model),
     prompt: input.prompt,
     context: input.context,
-    apiKey: getKey(c.provider, secretsPath, keyPath) ?? "",
+    apiKey: effectiveApiKey(c.provider, getKey(c.provider, secretsPath, keyPath)),
     timeoutMs: candidateTimeoutMs,
     workerPrompt: personaPrompts.worker,
   }));
@@ -288,7 +288,7 @@ export async function runFusion(input: FusionInput): Promise<FusionResult> {
 
   // --- Judge step 1: analysis ---
   const judgeModel = safeResolve(judge.provider, judge.model);
-  const judgeApiKey = getKey(judge.provider, secretsPath, keyPath) ?? "";
+  const judgeApiKey = effectiveApiKey(judge.provider, getKey(judge.provider, secretsPath, keyPath));
   const candidateViews: CandidateView[] = survivors.map((w, i) => ({
     index: i + 1,
     provider: w.provider,

diff --git a/src/index.ts b/src/index.ts
@@ -7,8 +7,17 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
 import { createMcpServer } from "./server/mcp-server.js";
 import { startUiServer } from "./server/ui-server.js";
 import { printStartupBanner } from "./util/startup.js";
+import { registerConfigModels } from "./providers/pi-ai-bridge.js";
+import { loadConfig } from "./config/store.js";
 
 async function main(): Promise<void> {
+  // Register any custom provider models referenced by the saved config so
+  // resolveModel() works at fusion time without requiring a prior UI /models
+  // call. loadConfig() returns an empty config (no throw) when the file is
+  // absent (first run), so a genuinely corrupt config.json fails loudly here
+  // rather than being silently swallowed.
+  registerConfigModels(loadConfig());
+
   // First-run banner (stderr) + auto-open the dashboard on a fresh install.
   await printStartupBanner();
 

diff --git a/src/providers/custom-providers.ts b/src/providers/custom-providers.ts
@@ -0,0 +1,187 @@
+// Custom provider definitions for OpenFusion.
+//
+// pi-ai's static registry covers the well-known cloud providers, but two
+// OpenAI-compatible endpoints aren't in it: rapid-mlx (a LOCAL MLX inference
+// server on Apple Silicon) and ollama-cloud (Ollama's hosted CLOUD API). This
+// module defines both so they appear in the web config dropdowns and resolve
+// correctly at fusion time. Despite the branch name ("local-providers"), this
+// feature intentionally covers BOTH a local server and a cloud provider.
+//
+// Both custom providers are discoverable — they expose a /v1/models endpoint
+// so the server can fetch the actual available models at runtime. No hardcoded
+// model lists: rapid-mlx's models depend on what's loaded locally, and
+// ollama-cloud's catalog changes as Ollama adds new cloud models.
+//
+// The `local` flag distinguishes local servers (may be unreachable, so the UI
+// shows a free-text input + a Discover button to retry) from cloud providers
+// (always reachable, show a normal dropdown).
+//
+// KNOWN LIMITATION: buildModelDescriptor() bakes in default contextWindow
+// (131072) and maxTokens (8192) for every discovered/typed model, because the
+// OpenAI /v1/models response doesn't carry those fields. Cost is reported as 0
+// for the same reason. If a provider under-reports, the dashboard's per-model
+// context badge may be inaccurate; this does not affect fusion correctness.
+//
+// At runtime, registerConfigModels() (called at startup + after each config
+// save) registers descriptors for models referenced in the saved config so
+// resolveModel() works. For discovered or user-typed models,
+// registerCustomModel() is called on the fly.
+import type { AnyModel } from "./pi-ai-bridge.js";
+
+/** A custom provider definition. */
+export interface CustomProviderDefinition {
+  /** Unique provider id (used in config.json and secrets). */
+  id: string;
+  /** Human-readable name for the UI. */
+  name: string;
+  /** Short description shown in the UI. */
+  description: string;
+  /** Whether this provider requires an API key. Local servers typically don't. */
+  apiKeyRequired: boolean;
+  /** Base URL for the OpenAI-compatible API endpoint. */
+  baseUrl: string;
+  /** pi-ai API type. All custom providers currently use openai-completions. */
+  api: "openai-completions" | "openai-responses";
+  /**
+   * Whether this provider supports /v1/models discovery.
+   * When true, the /models API endpoint will query the provider's /v1/models
+   * for a live model list and return it as a normal dropdown.
+   */
+  discoverable: boolean;
+  /**
+   * Whether this is a local provider that may be unreachable.
+   * When true + discoverable, the UI shows a free-text input for model IDs
+   * if the server is down (no models found), plus a Discover button to retry.
+   * Cloud providers (local=false) always show a normal dropdown.
+   */
+  local: boolean;
+  /** Compat overrides for the OpenAI completions API (auto-detected if not set). */
+  compat?: Record<string, unknown>;
+}
+
+// ─── Provider definitions ────────────────────────────────────────────────────
+
+/** rapid-mlx: local MLX inference server for Apple Silicon. No API key needed. */
+export const RAPID_MLX: CustomProviderDefinition = {
+  id: "rapid-mlx",
+  name: "Rapid-MLX (Local)",
+  description:
+    "Local MLX inference server for Apple Silicon. Runs on localhost — no API key needed. " +
+    "Click Discover to load available models, or type a model ID directly.",
+  apiKeyRequired: false,
+  baseUrl: "http://localhost:8000/v1",
+  api: "openai-completions",
+  discoverable: true,
+  local: true,
+  compat: {
+    supportsStore: false,
+    supportsDeveloperRole: false,
+    supportsReasoningEffort: false,
+    maxTokensField: "max_tokens",
+    supportsStrictMode: false,
+    supportsLongCacheRetention: false,
+  },
+};
+
+/** ollama-cloud: Ollama's cloud API at ollama.com. Requires an API key. */
+export const OLLAMA_CLOUD: CustomProviderDefinition = {
+  id: "ollama-cloud",
+  name: "Ollama Cloud",
+  description:
+    "Ollama's hosted cloud API at ollama.com. Requires an API key. " +
+    "Models are fetched from the cloud catalog automatically.",
+  apiKeyRequired: true,
+  baseUrl: "https://ollama.com/v1",
+  api: "openai-completions",
+  discoverable: true,
+  local: false,
+  compat: {
+    supportsStore: false,
+    supportsDeveloperRole: false,
+    supportsReasoningEffort: false,
+    maxTokensField: "max_tokens",
+    supportsStrictMode: false,
+    supportsLongCacheRetention: false,
+  },
+};
+
+/** All custom provider definitions, keyed by provider id. */
+export const CUSTOM_PROVIDERS: Record<string, CustomProviderDefinition> = {
+  [RAPID_MLX.id]: RAPID_MLX,
+  [OLLAMA_CLOUD.id]: OLLAMA_CLOUD,
+};
+
+/** Provider ids that don't require an API key. */
+export const KEYLESS_PROVIDERS = new Set(
+  Object.values(CUSTOM_PROVIDERS)
+    .filter((p) => !p.apiKeyRequired)
+    .map((p) => p.id),
+);
+
+/**
+ * Build a model descriptor for a dynamically discovered or user-typed model.
+ * Used by registerCustomModel() and the discover endpoint.
+ */
+export function buildModelDescriptor(
+  provider: CustomProviderDefinition,
+  modelId: string,
+  overrides?: { contextWindow?: number; maxTokens?: number; reasoning?: boolean },
+): AnyModel {
+  return {
+    id: modelId,
+    name: modelId,
+    api: provider.api,
+    provider: provider.id,
+    baseUrl: provider.baseUrl,
+    reasoning: overrides?.reasoning ?? false,
+    input: ["text" as const],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: overrides?.contextWindow ?? 131072,
+    maxTokens: overrides?.maxTokens ?? 8192,
+    ...(provider.compat ? { compat: provider.compat } : {}),
+  };
+}
+
+/** Response shape from the OpenAI-compatible /v1/models endpoint. */
+export interface DiscoveryModel {
+  id: string;
+  object?: string;
+  created?: number;
+  owned_by?: string;
+}
+
+export interface DiscoveryResponse {
+  object?: string;
+  data: DiscoveryModel[];
+}
+
+/**
+ * Discover models from a provider's /v1/models endpoint.
+ * Returns a list of model IDs, or throws on network/auth errors.
+ */
+export async function discoverModels(
+  provider: CustomProviderDefinition,
+  apiKey?: string,
+): Promise<string[]> {
+  const url = `${provider.baseUrl}/models`;
+  const headers: Record<string, string> = {
+    Accept: "application/json",
+  };
+  if (apiKey) {
+    headers.Authorization = `Bearer ${apiKey}`;
+  }
+  const resp = await fetch(url, { headers, signal: AbortSignal.timeout(10_000) });
+  if (!resp.ok) {
+    const body = await resp.text().catch(() => "");
+    throw new Error(`${resp.status} ${resp.statusText}${body ? `: ${body.slice(0, 200)}` : ""}`);
+  }
+  // Tolerate non-compliant /v1/models responses: a null body, a missing data
+  // array, or non-object elements would otherwise crash discovery. Keep only
+  // entries that look like { id: string }.
+  const json = (await resp.json()) as DiscoveryResponse | null;
+  const models = json && Array.isArray(json.data) ? json.data : [];
+  return models
+    .filter((m): m is DiscoveryModel => m != null && typeof m === "object" && typeof m.id === "string")
+    .map((m) => m.id)
+    .sort();
+}