diff --git a/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts b/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts index 4c202c2e..37a26c1c 100644 --- a/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts +++ b/middleware/packages/harness-orchestrator/src/buildOrchestrator.ts @@ -147,6 +147,16 @@ export interface BuiltOrchestrator { readonly orchestrator: Orchestrator; /** The `chatAgent` bundle — verifier-wrapped agent + raw orchestrator. */ readonly bundle: ChatAgentBundle; + /** + * The resolved orchestrator model after the per-Agent overlay was applied — + * i.e. the actual id the turn loop will send to the provider for this Agent. + * Mirrors `config.model`; carried out so callers (registry log, /admin UI) + * can read it without re-computing the overlay. Issue #296 acceptance #4. + */ + readonly effectiveModel: string; + /** Same as `config.modelRouting` — surfaced so callers can describe the + * per-turn routing the Agent is on without inspecting the orchestrator. */ + readonly effectiveModelRouting?: ModelRoutingConfig; } /** @@ -331,6 +341,8 @@ export function buildOrchestratorForAgent( sessionLogger, chatSessionStore, }, + effectiveModel: config.model, + ...(config.modelRouting ? { effectiveModelRouting: config.modelRouting } : {}), }; } @@ -356,5 +368,7 @@ export function buildOrchestratorForAgent( return { orchestrator, bundle: { agent, raw: orchestrator, sessionLogger, chatSessionStore }, + effectiveModel: config.model, + ...(config.modelRouting ? { effectiveModelRouting: config.modelRouting } : {}), }; } diff --git a/middleware/packages/harness-orchestrator/src/index.ts b/middleware/packages/harness-orchestrator/src/index.ts index ffcf62c0..796a9441 100644 --- a/middleware/packages/harness-orchestrator/src/index.ts +++ b/middleware/packages/harness-orchestrator/src/index.ts @@ -69,6 +69,8 @@ export type { ScopedMemoryStoreOptions } from './registry/scopedMemoryStore.js'; export { ConfigStore, ConfigValidationError, + validateModelRef, + validateModelRoutingShape, } from './registry/configStore.js'; export type { AgentInput, @@ -123,7 +125,10 @@ export type { SubAgentGraph, SubAgentToolDeps, } from './registry/subAgentTools.js'; -export { resolveAgentModelRouting } from './registry/agentRuntime.js'; +export { + DEFAULT_ORCHESTRATOR_MODEL, + resolveAgentModelRouting, +} from './registry/agentRuntime.js'; export type { ResolvedAgentRuntime } from './registry/agentRuntime.js'; // Per-Agent Orchestrator factory (US3) — re-exported so US4-style external diff --git a/middleware/packages/harness-orchestrator/src/plugin.ts b/middleware/packages/harness-orchestrator/src/plugin.ts index d1635d4e..161f7a7f 100644 --- a/middleware/packages/harness-orchestrator/src/plugin.ts +++ b/middleware/packages/harness-orchestrator/src/plugin.ts @@ -54,6 +54,7 @@ import type { TurnHookRunner } from './turnHooks.js'; import type { ChatSessionStore } from './chatSessionStore.js'; import type { NativeToolRegistry } from './nativeToolRegistry.js'; import type { Orchestrator } from './orchestrator.js'; +import { DEFAULT_ORCHESTRATOR_MODEL } from './registry/agentRuntime.js'; import { ConfigStore } from './registry/configStore.js'; import { OrchestratorRegistry, @@ -139,11 +140,11 @@ const GRAPH_POOL_SERVICE = 'graphPool'; const PLUGIN_CAPABILITIES_SERVICE = 'pluginCapabilities'; // Fallback when the operator has not set `orchestrator_model` in the install -// config. Must be a currently-served Anthropic model id — a stale/typo'd id -// makes every turn fail with `404 not_found_error` (the orchestrator main -// loop has no other model source). Kept in sync with the kernel default -// `ORCHESTRATOR_MODEL` in middleware/src/config.ts. -const DEFAULT_MODEL = 'claude-opus-4-8'; +// config. Shared with the registry's per-instance resolution (tier 3) so the +// install-config default and the per-Agent overlay fall back to the same id. +// Kept in sync with the kernel default `ORCHESTRATOR_MODEL` in +// middleware/src/config.ts. +const DEFAULT_MODEL = DEFAULT_ORCHESTRATOR_MODEL; // 8192, not 4096: a verbose preamble + a large structured tool call (e.g. a // multi-sheet create_xlsx with formulas) truncates at 4096 → `max_tokens` // mid-tool-call, so the file is never built. Also enforced as a floor below so diff --git a/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts b/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts index 0abb23af..a2837226 100644 --- a/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts +++ b/middleware/packages/harness-orchestrator/src/registry/agentGraphStore.ts @@ -1,6 +1,6 @@ import type { Pool } from 'pg'; -import { ConfigValidationError } from './configStore.js'; +import { ConfigValidationError, validateModelRef } from './configStore.js'; /** * Agent Builder graph store (P0). @@ -315,7 +315,17 @@ export class AgentGraphStore { return rows.map(mapSubAgent); } - async createSubAgent(input: SubAgentInput): Promise { + async createSubAgent( + input: SubAgentInput, + activeProvider?: string, + ): Promise { + // Issue #296 follow-up — guard sub-agent model writes against unknown + // ids the same way `setModelRouting` guards the orchestrator model. + // Empty / null skips validation (= inherit parent agent / platform). + // `activeProvider` additionally rejects a cross-provider pick. + if (input.model != null && input.model.trim() !== '') { + validateModelRef('subAgent.model', input.model.trim(), activeProvider); + } try { const { rows } = await this.pool.query( `INSERT INTO agent_subagents @@ -327,7 +337,10 @@ export class AgentGraphStore { input.parentAgentId, input.name, input.skillId ?? null, - input.model ?? null, + // Normalise the empty-string "(default)" dropdown choice to NULL so + // the column stays clean (`resolveSubAgentModel('')` would inherit + // anyway, but `''` is dirty data). + input.model?.trim() || null, input.maxTokens ?? null, input.maxIterations ?? null, input.systemPromptOverride ?? null, @@ -346,12 +359,29 @@ export class AgentGraphStore { } } - async updateSubAgent(id: string, patch: SubAgentPatch): Promise { + async updateSubAgent( + id: string, + patch: SubAgentPatch, + activeProvider?: string, + ): Promise { + // Issue #296 follow-up — see `createSubAgent` rationale. + // Non-empty string → validate + pin the model. + // `null` / empty → CLEAR back to inherit-parent (skip validation). + // Absent (undefined) → keep the existing value untouched. + if (patch.model != null && patch.model.trim() !== '') { + validateModelRef('subAgent.model', patch.model.trim(), activeProvider); + } + // `model` cannot use COALESCE: COALESCE(NULL, model) keeps the old value, + // so a `null` clear would be a silent no-op. Guard the write with an + // explicit "was model in the patch?" flag ($10) so `undefined` keeps and + // `null`/'' clears to NULL. + const modelProvided = patch.model !== undefined; + const modelValue = patch.model?.trim() || null; const { rows } = await this.pool.query( `UPDATE agent_subagents SET name = COALESCE($2, name), skill_id = COALESCE($3, skill_id), - model = COALESCE($4, model), + model = CASE WHEN $10 THEN $4 ELSE model END, max_tokens = COALESCE($5, max_tokens), max_iterations = COALESCE($6, max_iterations), system_prompt_override = COALESCE($7, system_prompt_override), @@ -364,12 +394,13 @@ export class AgentGraphStore { id, patch.name ?? null, patch.skillId ?? null, - patch.model ?? null, + modelValue, patch.maxTokens ?? null, patch.maxIterations ?? null, patch.systemPromptOverride ?? null, patch.status ?? null, patch.position ? JSON.stringify(patch.position) : null, + modelProvided, ], ); const row = rows[0]; diff --git a/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts b/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts index bfdbfc37..837f7da6 100644 --- a/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts +++ b/middleware/packages/harness-orchestrator/src/registry/agentRuntime.ts @@ -1,3 +1,5 @@ +import { resolveModelRef } from '@omadia/llm-provider'; + import type { ModelRoutingConfig as RuntimeModelRouting } from '../modelRouter.js'; /** @@ -15,7 +17,65 @@ import type { ModelRoutingConfig as RuntimeModelRouting } from '../modelRouter.j * unit-testable without a DB. */ -const DEFAULT_CLASSIFIER_MODEL = 'claude-haiku-4-5'; +// Must be an id the registry actually serves: `validateModelRef` rejects an +// unregistered ref, so the code's own default must agree with its write- +// validation. The registered Haiku is the dated id (alias `haiku`); the +// undated `claude-haiku-4-5` is NOT in the registry (issue #296 nit). +const DEFAULT_CLASSIFIER_MODEL = 'claude-haiku-4-5-20251001'; + +/** + * Hard fallback orchestrator model — the last tier of the per-instance model + * resolution (issue #296 AC#2): + * + * 1. the Agent's own `model_routing.main` (operator's per-instance choice) + * 2. the global seeded platform default (`orchestrator_model` install config, + * itself seeded from the `ORCHESTRATOR_MODEL` env in middleware/src/config.ts) + * 3. this constant — so an empty / misconfigured platform default never yields + * an empty model id (which would 404 on every turn). + * + * Must be a currently-served model id, kept in sync with `ORCHESTRATOR_MODEL` + * (middleware/src/config.ts) and the plugin's install-config fallback. + */ +export const DEFAULT_ORCHESTRATOR_MODEL = 'claude-opus-4-8'; + +/** + * Resolve a model ref to the active provider's concrete bare `modelId` + * (issue #296). + * + * Both the orchestrator main loop AND in-process sub-agents send `model` RAW to + * a single concrete provider adapter — there is no ref→modelId resolution in + * the send path. The Admin picker stores a provider-qualified id + * (`anthropic:claude-opus-4-8`) or a legacy alias (`opus`); sending either raw + * 404s every turn. Returns: + * - registry-known, same provider → the bare vendor `modelId` + * - registry-known, DIFFERENT provider than `activeProvider` → `undefined` + * (cross-provider is out of scope and would 404 on the wrong adapter — the + * caller falls back to its own default) + * - registry-UNKNOWN (not in the curated set) → the raw trimmed ref. The + * registry is a curated subset, not the universe of valid API ids — an id + * the registry does not list may still be served (e.g. an undated default + * or an operator-typed id). Passing it through preserves pre-resolution + * behaviour, matching the `resolveModelRef(x)?.modelId ?? x` contract used + * elsewhere (e.g. `builderPreviewPrompt`). + * - empty / whitespace → `undefined` (no model specified → caller falls back) + * + * The CLI provider owns its own alias scheme (`sonnet`/`opus`) and must be + * handled by the caller BEFORE this — pass its refs through untouched. + */ +export function resolveModelIdForProvider( + ref: string | null | undefined, + activeProvider: string | undefined, +): string | undefined { + const trimmed = ref?.trim(); + if (!trimmed) return undefined; + const info = resolveModelRef( + trimmed, + activeProvider ? { defaultProvider: activeProvider } : {}, + ); + if (info === undefined) return trimmed; + if (activeProvider && info.provider !== activeProvider) return undefined; + return info.modelId; +} export interface ResolvedAgentRuntime { /** Primary model override (the agent's `main`), if set. */ diff --git a/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts b/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts index 1ecb341f..a83608d0 100644 --- a/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts +++ b/middleware/packages/harness-orchestrator/src/registry/applyDiff.ts @@ -10,7 +10,11 @@ import type { SubAgentRow, ToolGrantRow, } from './agentGraphStore.js'; -import { resolveAgentModelRouting } from './agentRuntime.js'; +import { + DEFAULT_ORCHESTRATOR_MODEL, + resolveAgentModelRouting, + resolveModelIdForProvider, +} from './agentRuntime.js'; import type { AgentPluginRow, AgentRow, @@ -160,18 +164,69 @@ export function buildForAgent( // platform default: `main` overrides the model, `triage` mode adds per-turn // Haiku→Sonnet/Opus routing. Falls back to the platform runtime when unset. const routing = resolveAgentModelRouting(agent.modelRouting); + + // The orchestrator hands `model` to a SINGLE concrete provider adapter, which + // sends it RAW to the wire API (no ref→modelId resolution in the send path). + // The Admin picker stores a provider-qualified id / alias, so resolve the + // per-Agent overlay to the active provider's concrete `modelId` HERE — see + // `resolveModelIdForProvider` (issue #296). + // + // The CLI provider is resolved like any other — its models ARE registered + // (`claude-cli:opus-cli` etc.), so `resolveModelIdForProvider('claude-cli:opus-cli', + // 'claude-cli')` → `opus-cli` → (`-cli` stripped downstream) → `opus`, the + // alias the CLI expects. The old code special-cased CLI to pass the ref RAW, + // which left the picker's `claude-cli:opus-cli` unresolved → `claude-cli:opus` + // after the strip → an invalid `--model` on every turn (issue #296 BLOCKER). + // + // A ref the resolver returns `undefined` for (a registry-known CROSS-provider + // id, or a legacy bare CLI alias like `opus`) falls through to the raw trimmed + // ref. Cross-provider picks are rejected at WRITE time (the model-routing / + // sub-agent validators are scoped to the active provider, issue #296 MAJOR), + // so a cross-provider ref never reaches here for a fresh write; the raw + // fallthrough is what lets a CLI deployment's bare alias (`opus`) run. A + // registry-UNKNOWN same-context ref is already returned raw by the resolver. + // The platform default (`runtime.model`, operator-set env) is not passed + // through here — it works raw today and resolving it would change established + // behaviour. + const activeProvider = deps.provider?.id; + const resolveOverlay = (ref: string | undefined): string | undefined => + (resolveModelIdForProvider(ref, activeProvider) ?? ref?.trim()) || undefined; + + // Per-instance model resolution (issue #296 AC#2), three tiers: + // 1. the Agent's `model_routing.main` (operator's per-Agent choice) + // 2. the global seeded platform default `runtime.model` + // (= `orchestrator_model` install config = `ORCHESTRATOR_MODEL` env) + // 3. `DEFAULT_ORCHESTRATOR_MODEL` — guards against an empty / whitespace + // platform default so the turn loop never gets an empty model id. + const model = + resolveOverlay(routing.model) || + runtime.model?.trim() || + DEFAULT_ORCHESTRATOR_MODEL; + + // Resolve the per-turn routing sub-models the same way. Any sub-model that + // does not resolve to the active provider falls back to the resolved `model` + // so every id the turn loop sends is a valid same-provider `modelId`. + const overlayRouting = routing.modelRouting + ? { + classifierModel: + resolveOverlay(routing.modelRouting.classifierModel) ?? model, + simpleModel: resolveOverlay(routing.modelRouting.simpleModel) ?? model, + complexModel: resolveOverlay(routing.modelRouting.complexModel) ?? model, + } + : undefined; + return buildOrchestratorForAgent( { agentId: agent.slug, - model: routing.model ?? runtime.model, + model, maxTokens: runtime.maxTokens, maxToolIterations: runtime.maxToolIterations, // Per-turn model routing: prefer the agent's own persisted routing // (Agent Builder P5); otherwise fall back to the platform default // `runtime.modelRouting` so registry-managed orchestrators still emit // `turn_routing` and the UI renders the Haiku-triage badge (origin/main). - ...((routing.modelRouting ?? runtime.modelRouting) - ? { modelRouting: routing.modelRouting ?? runtime.modelRouting } + ...((overlayRouting ?? runtime.modelRouting) + ? { modelRouting: overlayRouting ?? runtime.modelRouting } : {}), ...(runtime.loopRepeatSoft !== undefined ? { loopRepeatSoft: runtime.loopRepeatSoft } diff --git a/middleware/packages/harness-orchestrator/src/registry/configStore.ts b/middleware/packages/harness-orchestrator/src/registry/configStore.ts index cb8063b7..2d8788ee 100644 --- a/middleware/packages/harness-orchestrator/src/registry/configStore.ts +++ b/middleware/packages/harness-orchestrator/src/registry/configStore.ts @@ -1,5 +1,7 @@ import type { Pool } from 'pg'; +import { resolveModelRef } from '@omadia/llm-provider'; + import { AgentGraphStore, type ScheduleRow, @@ -110,6 +112,90 @@ export class ConfigValidationError extends Error { const SLUG_RE = /^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$/; +/** + * Validate persisted `model_routing` JSON before write. Shape: + * { mode: 'single'|'triage', main: , triage?: , simple?: } + * Each `` must resolve via `resolveModelRef` (provider-qualified id, + * legacy alias, bare vendor id, or `class:*`). An empty `main` is illegal — + * use `null` to clear. Optional fields may be absent or empty-string (latter + * treated as absent so a UI dropdown's "(default)" choice round-trips cleanly). + */ +export function validateModelRoutingShape( + routing: Record, + activeProvider?: string, +): void { + const mode = routing['mode']; + if (mode !== 'single' && mode !== 'triage') { + throw new ConfigValidationError( + `modelRouting.mode must be 'single' or 'triage' (got ${JSON.stringify(mode)})`, + ); + } + const main = routing['main']; + if (typeof main !== 'string' || main.trim() === '') { + throw new ConfigValidationError( + `modelRouting.main is required (clear routing by passing null instead)`, + ); + } + validateModelRef(`modelRouting.main`, main.trim(), activeProvider); + if (mode === 'triage') { + for (const key of ['triage', 'simple'] as const) { + const raw = routing[key]; + if (raw === undefined || raw === null) continue; + if (typeof raw !== 'string') { + throw new ConfigValidationError( + `modelRouting.${key} must be a string (got ${typeof raw})`, + ); + } + const trimmed = raw.trim(); + if (trimmed === '') continue; + validateModelRef(`modelRouting.${key}`, trimmed, activeProvider); + } + } +} + +/** + * Throw `ConfigValidationError` when `ref` is non-empty and does not resolve + * to any model registered with `@omadia/llm-provider`. Used by every persisted + * model-id surface (orchestrator routing, sub-agent overrides) so the operator + * cannot pin runtime to an id the live provider set does not serve — an + * unknown id would 404 at every turn. Empty / whitespace `ref` is rejected — + * callers should skip the validator when clearing the field instead. + * + * When `activeProvider` is given (the orchestrator's single configured + * provider), the ref is resolved in that provider's context AND a ref that + * resolves to a DIFFERENT provider is rejected: cross-provider routing is out + * of scope (issue #296) and would be silently dropped to the platform default + * at build, so the picker must not be able to persist a model the Agent never + * actually runs on. Without it the check is provider-agnostic (legacy). + */ +export function validateModelRef( + field: string, + ref: string, + activeProvider?: string, +): void { + if (typeof ref !== 'string' || ref.trim() === '') { + throw new ConfigValidationError( + `${field} must be a non-empty model ref (clear with null instead)`, + ); + } + const info = resolveModelRef( + ref.trim(), + activeProvider ? { defaultProvider: activeProvider } : {}, + ); + if (info === undefined) { + throw new ConfigValidationError( + `${field} '${ref}' is not registered with any installed LLM provider`, + ); + } + if (activeProvider && info.provider !== activeProvider) { + throw new ConfigValidationError( + `${field} '${ref}' resolves to provider '${info.provider}', but the ` + + `orchestrator runs on '${activeProvider}' — cross-provider model ` + + `selection is not supported; pick a '${activeProvider}' model`, + ); + } +} + interface AgentDbRow { id: string; slug: string; @@ -283,11 +369,19 @@ export class ConfigStore { } /** Agent Builder — set (or clear, with null) the per-agent model routing. - * Direct write (not COALESCE) so the operator can disable routing. */ + * Direct write (not COALESCE) so the operator can disable routing. + * + * Validates `main`/`triage`/`simple` against `@omadia/llm-provider` so an + * operator (or a stale REST client) cannot pin an agent to a model id that + * no installed provider serves — that would crash every turn at runtime + * with `404 not_found_error`. `null` clears routing back to the platform + * default and skips validation. */ async setModelRouting( id: string, routing: Record | null, + activeProvider?: string, ): Promise { + if (routing) validateModelRoutingShape(routing, activeProvider); const { rows } = await this.pool.query( `UPDATE agents SET model_routing = $2::jsonb, updated_at = now() WHERE id = $1 RETURNING *`, diff --git a/middleware/packages/harness-orchestrator/src/registry/index.ts b/middleware/packages/harness-orchestrator/src/registry/index.ts index c55e5cda..25b4658c 100644 --- a/middleware/packages/harness-orchestrator/src/registry/index.ts +++ b/middleware/packages/harness-orchestrator/src/registry/index.ts @@ -333,6 +333,10 @@ export class OrchestratorRegistry { agentId: action.agent.id, plugins: plugins.filter((p) => p.enabled).map((p) => p.pluginId), memoryScope, + effectiveModel: built.effectiveModel, + ...(built.effectiveModelRouting + ? { effectiveModelRouting: built.effectiveModelRouting } + : {}), }); return; } @@ -376,6 +380,10 @@ export class OrchestratorRegistry { agentId: action.agent.id, reason: action.reason, replaced: !!before, + effectiveModel: built.effectiveModel, + ...(built.effectiveModelRouting + ? { effectiveModelRouting: built.effectiveModelRouting } + : {}), }); return; } diff --git a/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts b/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts index 68d8a134..ac3cd60e 100644 --- a/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts +++ b/middleware/packages/harness-orchestrator/src/registry/subAgentTools.ts @@ -3,6 +3,7 @@ import type { LocalSubAgentTool } from '@omadia/plugin-api'; import { createCliSubAgent } from '../cliSubAgent.js'; import { LocalSubAgent } from '../localSubAgent.js'; +import { resolveModelIdForProvider } from './agentRuntime.js'; import type { McpManager} from '../mcp/mcpClient.js'; import { @@ -97,15 +98,18 @@ export function buildSubAgentDomainTools( ? createCliSubAgent({ name: sub.name, systemPrompt, + // Resolve the picker-stored ref to the bare `modelId` BEFORE the + // `-cli` strip — exactly like the orchestrator main model (issue #296 + // BLOCKER). See `resolveCliSubAgentModel`. model: (deps.cliModelAlias ?? ((model) => model))( - sub.model ?? deps.defaultModel, + resolveCliSubAgentModel(sub.model, deps.defaultModel), ), tools: subTools, }) : new LocalSubAgent({ name: sub.name, provider: deps.provider, - model: sub.model ?? deps.defaultModel, + model: resolveSubAgentModel(sub.model, deps), maxTokens: sub.maxTokens ?? deps.defaultMaxTokens, maxIterations: sub.maxIterations ?? deps.defaultMaxIterations, systemPrompt, @@ -124,6 +128,49 @@ export function buildSubAgentDomainTools( return tools; } +/** + * Resolve a sub-agent's persisted `model` ref to the active provider's concrete + * bare `modelId` (issue #296 MAJOR 3). `LocalSubAgent` sends `model` RAW to the + * provider adapter, exactly like the orchestrator main loop — so a picker-stored + * provider-qualified id (`anthropic:claude-opus-4-8`) or alias (`opus`) would + * 404 every delegated turn. Shares the resolver with the orchestrator path + * (`resolveModelIdForProvider`); an empty / unknown / cross-provider ref + * degrades to the parent's default model instead of 404ing. The CLI sub-agent + * path keeps its own alias scheme (`cliModelAlias`) and is not routed here. + */ +export function resolveSubAgentModel( + ref: string | null | undefined, + deps: Pick, +): string { + return ( + resolveModelIdForProvider(ref, deps.provider?.id) ?? deps.defaultModel + ); +} + +/** + * CLI-provider variant of `resolveSubAgentModel` (issue #296 BLOCKER). The CLI + * sub-agent runs the picked ref through `cliModelAlias` (which strips the `-cli` + * suffix back to `opus`/`sonnet`/`haiku`), so the ref must first be resolved to + * the bare CLI `modelId` — a picker-stored `claude-cli:opus-cli` must become + * `opus-cli` (→ `opus`), NOT reach the strip raw (→ the invalid `claude-cli:opus`). + * + * Resolves against `claude-cli` explicitly (the caller is inside the + * `hostIsCliProvider` branch and `deps.provider` may be absent on the hydrate + * path). A ref that does not resolve under `claude-cli` — a legacy bare alias + * (`opus`) or full vendor id — falls through untouched so `cliModelAlias` + * handles it exactly as before; an empty ref inherits the parent default. + * Mirrors the orchestrator main model's overlay resolution in `applyDiff`. + */ +export function resolveCliSubAgentModel( + ref: string | null | undefined, + defaultModel: string, +): string { + return ( + (resolveModelIdForProvider(ref, 'claude-cli') ?? ref?.trim()) || + defaultModel + ); +} + function resolveSubAgentTools( grants: readonly ToolGrantRow[], deps: SubAgentToolDeps, diff --git a/middleware/src/index.ts b/middleware/src/index.ts index 59756490..842769c0 100644 --- a/middleware/src/index.ts +++ b/middleware/src/index.ts @@ -2024,6 +2024,20 @@ async function main(): Promise { '[middleware] operator-channels endpoints ready at /api/v1/operator/channels/* (auth-gated)', ); + // The orchestrator's single configured LLM provider id, live-read from the + // installed `@omadia/orchestrator` config so a runtime provider switch is + // picked up without a restart (mirrors `hostProviderId` / the sub-agent + // hydrate reader). Default `anthropic`. Shared by the operator model-write + // scoping and the builder model picker (issue #296). + const orchestratorActiveProviderId = (): string => { + const raw = installedRegistry.get('@omadia/orchestrator')?.config?.[ + 'llm_provider' + ]; + return typeof raw === 'string' && raw.trim().length > 0 + ? raw.trim() + : 'anthropic'; + }; + // Agent Builder canvas backend (P1/P2). Mounted at the /api/v1/operator // parent so the /agents/:slug/graph|subagents|… subpaths fall through here // after the operator-agents router. 503s without a graphPool (in-memory KG @@ -2039,6 +2053,10 @@ async function main(): Promise { graphPool ? new AgentGraphStore(graphPool) : undefined, getRegistry: () => serviceRegistry.get('orchestratorRegistry'), + // Live-read the orchestrator's single configured provider so per-Agent / + // sub-agent model writes are scoped to it (issue #296 — a cross-provider + // pick is rejected instead of silently dropped at build). + getActiveProvider: orchestratorActiveProviderId, }), ); console.log( @@ -3270,6 +3288,9 @@ async function main(): Promise { store: draftStore, quota: draftQuota, connectedProviders: builderConnectedProviders, + // Scope the model picker to the orchestrator's active provider so a + // cross-provider pick can't be offered (issue #296). + activeProvider: orchestratorActiveProviderId, preview: { draftStore, previewCache, diff --git a/middleware/src/routes/agentBuilder.ts b/middleware/src/routes/agentBuilder.ts index a8e137d2..db7659f2 100644 --- a/middleware/src/routes/agentBuilder.ts +++ b/middleware/src/routes/agentBuilder.ts @@ -33,6 +33,11 @@ export interface AgentBuilderRouterOptions { readonly getConfigStore: () => ConfigStore | undefined; readonly getGraphStore: () => AgentGraphStore | undefined; readonly getRegistry: () => OrchestratorRegistry | undefined; + /** The orchestrator's single configured LLM provider id (live-read from the + * installed `@omadia/orchestrator` config, default `anthropic`). Scopes + * per-Agent / sub-agent model writes to this provider so a cross-provider + * pick is rejected instead of silently dropped at build (issue #296). */ + readonly getActiveProvider?: () => string | undefined; } interface Live { @@ -87,7 +92,7 @@ export function createAgentBuilderRouter( l.graph.listSchedulesForAgent(agent.id), ]); res.json( - assembleGraph(agent, bindings, subAgents, skills, grants, servers, schedules), + assembleGraph(agent, bindings, subAgents, skills, grants, servers, schedules, l.registry), ); } catch (err) { fail(res, err); @@ -133,17 +138,20 @@ export function createAgentBuilderRouter( const agent = await agentOr404(l, str(req.params.slug), res); if (!agent) return; const b = req.body ?? {}; - const row = await l.graph.createSubAgent({ - parentAgentId: agent.id, - name: String(b.name ?? '').trim(), - skillId: b.skillId ?? null, - model: b.model ?? null, - maxTokens: b.maxTokens ?? null, - maxIterations: b.maxIterations ?? null, - systemPromptOverride: b.systemPromptOverride ?? null, - status: b.status ?? 'enabled', - position: b.position ?? null, - }); + const row = await l.graph.createSubAgent( + { + parentAgentId: agent.id, + name: String(b.name ?? '').trim(), + skillId: b.skillId ?? null, + model: b.model ?? null, + maxTokens: b.maxTokens ?? null, + maxIterations: b.maxIterations ?? null, + systemPromptOverride: b.systemPromptOverride ?? null, + status: b.status ?? 'enabled', + position: b.position ?? null, + }, + options.getActiveProvider?.(), + ); await reload(l); res.json(subAgentNode(row)); } catch (err) { @@ -157,7 +165,11 @@ export function createAgentBuilderRouter( const l = live(res); if (!l) return; try { - const row = await l.graph.updateSubAgent(str(req.params.id), req.body ?? {}); + const row = await l.graph.updateSubAgent( + str(req.params.id), + req.body ?? {}, + options.getActiveProvider?.(), + ); await reload(l); res.json(subAgentNode(row)); } catch (err) { @@ -191,9 +203,13 @@ export function createAgentBuilderRouter( const agent = await agentOr404(l, str(req.params.slug), res); if (!agent) return; const routing = (req.body ?? {}).modelRouting ?? null; - const updated = await l.config.setModelRouting(agent.id, routing); + const updated = await l.config.setModelRouting( + agent.id, + routing, + options.getActiveProvider?.(), + ); await reload(l); - res.json(agentNode(updated)); + res.json(agentNode(updated, l.registry)); } catch (err) { fail(res, err); } @@ -475,6 +491,7 @@ function assembleGraph( grants: readonly ToolGrantRow[], servers: readonly McpServerRow[], schedules: readonly ScheduleRow[], + registry: OrchestratorRegistry | undefined, ) { const mySubs = subAgents.filter((s) => s.parentAgentId === agent.id); const subIds = new Set(mySubs.map((s) => s.id)); @@ -527,7 +544,7 @@ function assembleGraph( } return { - agent: agentNode(agent), + agent: agentNode(agent, registry), channels: bindings.map((b) => ({ channelType: b.channelType, channelKey: b.channelKey, @@ -544,7 +561,18 @@ function assembleGraph( // ── node mappers ───────────────────────────────────────────────────────────── -function agentNode(a: AgentRow) { +/** + * Map an `AgentRow` to the canvas `agent` node payload. Exported for unit + * tests so the `effectiveModel` surface stays covered without spinning up + * the express app. + */ +export function agentNode(a: AgentRow, registry: OrchestratorRegistry | undefined) { + // Issue #296 acceptance #4 — surface the orchestrator model the registry + // actually resolved for this Agent (per-Agent overlay applied to the + // platform default). Absent when the registry has not yet built the Agent + // (in-memory bootstrap / Agent disabled); UI then shows just the persisted + // `modelRouting.main` as a hint. + const built = registry?.get(a.slug)?.built; return { id: a.id, slug: a.slug, @@ -553,6 +581,7 @@ function agentNode(a: AgentRow) { privacyProfile: a.privacyProfile, status: a.status, modelRouting: (a.modelRouting as Record | null) ?? null, + effectiveModel: built?.effectiveModel ?? null, position: a.canvasPosition ?? null, }; } diff --git a/middleware/src/routes/builder.ts b/middleware/src/routes/builder.ts index dbb475f0..8fdb1df0 100644 --- a/middleware/src/routes/builder.ts +++ b/middleware/src/routes/builder.ts @@ -126,6 +126,11 @@ export interface BuilderRouterDeps { * key is configured, or the provider is keyless). When omitted, the model * picker lists every registered model regardless of connection. */ connectedProviders?: () => Promise>; + /** The orchestrator's single configured provider id (live-read). When set, + * the model picker is scoped to it so a cross-provider pick can't be offered + * (it would be silently dropped to the platform default at build — issue + * #296). When omitted, the picker falls back to `connectedProviders`. */ + activeProvider?: () => string | undefined; } export function createBuilderRouter(deps: BuilderRouterDeps): Router { @@ -288,13 +293,24 @@ export function createBuilderRouter(deps: BuilderRouterDeps): Router { // ── GET /models ───────────────────────────────────────────────────────── router.get('/models', async (_req: Request, res: Response) => { try { + // Scope to the orchestrator's active provider when known: a per-Agent + // pick on another provider would be dropped to the platform default at + // build (issue #296), so it must not be offered. Fall back to the + // connected-provider set (all keyed providers) when the active provider + // is unknown. + const active = deps.activeProvider?.(); const connected = deps.connectedProviders ? await deps.connectedProviders() : null; const models = BuilderModelRegistry.list() - .filter((m) => connected === null || connected.has(m.provider)) + .filter((m) => + active + ? m.provider === active + : connected === null || connected.has(m.provider), + ) .map((m) => ({ id: m.id, + model_id: m.modelId, label: m.label, provider: m.provider, model_class: m.modelClass, diff --git a/middleware/test/agentBuilderAgentNode.test.ts b/middleware/test/agentBuilderAgentNode.test.ts new file mode 100644 index 00000000..c409745e --- /dev/null +++ b/middleware/test/agentBuilderAgentNode.test.ts @@ -0,0 +1,87 @@ +/** + * Route surface — `agentNode()` shape on `GET /agents/:slug/graph` (and + * the `setModelRouting` response). Issue #296 acceptance #4: the registry's + * resolved orchestrator model must be reachable from the Admin UI without + * a kernel-side restart. + * + * `agentNode` is the route helper that builds the `agent` node payload from + * an `AgentRow` and the live registry. Tested directly so we don't need to + * spin up express or a real Postgres pool. + */ + +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +import type { + AgentRow, + BuiltOrchestrator, + OrchestratorRegistry, +} from '@omadia/orchestrator'; + +import { agentNode } from '../src/routes/agentBuilder.js'; + +const AGENT_ID = '00000000-0000-0000-0000-000000000001'; + +function agentRow(overrides: Partial = {}): AgentRow { + return { + id: AGENT_ID, + slug: 'public', + name: 'Public', + description: null, + privacyProfile: 'default', + status: 'enabled', + createdAt: new Date(0), + updatedAt: new Date(0), + ...overrides, + }; +} + +/** Minimal `OrchestratorRegistry` stub — `agentNode` only calls `.get(slug)`. */ +function fakeRegistry( + slug: string, + built: Partial, +): OrchestratorRegistry { + return { + get: (s: string) => (s === slug ? { built } : undefined), + } as unknown as OrchestratorRegistry; +} + +test('effectiveModel is null when no registry is available (in-memory boot / disabled agent)', () => { + const node = agentNode(agentRow(), undefined); + assert.equal(node.effectiveModel, null); + assert.equal(node.slug, 'public'); + assert.equal(node.modelRouting, null); +}); + +test('effectiveModel is null when the registry has no built entry for the agent', () => { + // E.g. the agent is disabled, or hot-reload has not finished — the registry + // is published but `.get(slug)` returns undefined. The UI then shows just + // the persisted `modelRouting.main` hint, not a stale "Active: X" badge. + const reg = fakeRegistry('some-other-slug', { effectiveModel: 'haiku' }); + const node = agentNode(agentRow(), reg); + assert.equal(node.effectiveModel, null); +}); + +test('effectiveModel mirrors the registry-resolved model id (per-agent overlay applied)', () => { + const reg = fakeRegistry('public', { effectiveModel: 'claude-opus-4-8' }); + const node = agentNode(agentRow(), reg); + assert.equal(node.effectiveModel, 'claude-opus-4-8'); +}); + +test('persisted modelRouting passes through verbatim alongside effectiveModel', () => { + // setModelRouting -> agentNode echoes the persisted JSON so the Inspector + // hydrates form state from the response without a second GET. + const reg = fakeRegistry('public', { effectiveModel: 'claude-haiku-4-5' }); + const node = agentNode( + agentRow({ + modelRouting: { mode: 'triage', main: 'claude-opus-4-8', triage: 'claude-haiku-4-5' }, + }), + reg, + ); + assert.deepEqual(node.modelRouting, { + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'claude-haiku-4-5', + }); + assert.equal(node.effectiveModel, 'claude-haiku-4-5'); +}); diff --git a/middleware/test/agentBuilderModelRouting.test.ts b/middleware/test/agentBuilderModelRouting.test.ts index 6d255e33..7cffefbe 100644 --- a/middleware/test/agentBuilderModelRouting.test.ts +++ b/middleware/test/agentBuilderModelRouting.test.ts @@ -37,7 +37,9 @@ test('triage mode → runtime routing with classifier/simple/complex', () => { test('triage without explicit simple defaults simple→main; missing triage→haiku', () => { const r = resolveAgentModelRouting({ mode: 'triage', main: 'claude-opus-4-8' }); assert.deepEqual(r.modelRouting, { - classifierModel: 'claude-haiku-4-5', + // DEFAULT_CLASSIFIER_MODEL is the dated, registry-served id (issue #296 + // nit) so the code's default agrees with its own write-validation. + classifierModel: 'claude-haiku-4-5-20251001', simpleModel: 'claude-opus-4-8', complexModel: 'claude-opus-4-8', }); diff --git a/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts b/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts new file mode 100644 index 00000000..6b6777c5 --- /dev/null +++ b/middleware/test/agentGraphStoreSubAgentModelValidation.test.ts @@ -0,0 +1,255 @@ +/** + * Validation guard on sub-agent `model` writes (issue #296 follow-up). + * + * `createSubAgent` / `updateSubAgent` accept a free-form `model: string | null`. + * `null` (or empty) means "inherit parent agent" and skips validation; a + * non-empty value must resolve via `@omadia/llm-provider` for the same reason + * orchestrator routing does — an unknown id would 404 at every turn the + * sub-agent runs. + * + * Pool is stubbed: the validator throws BEFORE any SQL is sent, so unknown ids + * never reach `INSERT`/`UPDATE`. The stub also asserts the inverse — accepted + * writes DO reach SQL. + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import type { Pool } from 'pg'; + +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { AgentGraphStore } from '../packages/harness-orchestrator/src/registry/agentGraphStore.js'; +import { ConfigValidationError } from '../packages/harness-orchestrator/src/registry/configStore.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; + +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, GPT]); +}); + +interface QueryCall { + sql: string; + params: unknown[]; +} + +/** Capture every `query()` call; return a deterministic `SubAgentDbRow`-shaped + * result so `mapSubAgent` succeeds and the call site doesn't crash on `rows[0]`. */ +function fakePool(): { pool: Pool; calls: QueryCall[] } { + const calls: QueryCall[] = []; + const pool = { + query: async (sql: string, params: unknown[]) => { + calls.push({ sql, params }); + return { + rows: [ + { + id: '00000000-0000-0000-0000-0000000000aa', + parent_agent_id: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + skill_id: null, + model: (params[3] ?? params[2] ?? null) as string | null, + max_tokens: null, + max_iterations: null, + system_prompt_override: null, + status: 'enabled', + position: null, + created_at: new Date(0), + updated_at: new Date(0), + }, + ], + }; + }, + } as unknown as Pool; + return { pool, calls }; +} + +test('createSubAgent rejects an unknown model id BEFORE touching the DB', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + model: 'gpt-99-not-registered', + }), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'gpt-99-not-registered'/.test(err.message), + ); + assert.equal(calls.length, 0, 'no SQL was sent for the rejected write'); +}); + +test('createSubAgent accepts a registered model and persists the row', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + const row = await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + model: 'claude-opus-4-8', + }); + assert.equal(row.model, 'claude-opus-4-8'); + assert.equal(calls.length, 1); + assert.match(calls[0]!.sql, /INSERT INTO agent_subagents/); +}); + +test('createSubAgent treats null / empty model as "inherit" — no validation, write fires', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + // null = clear / inherit parent; must NOT trigger the registered-model check. + await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'r1', + model: null, + }); + // empty string round-trips from the UI dropdown's "(default)" choice — same + // semantic as null per agentGraphStore guard. + await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'r2', + model: '', + }); + assert.equal(calls.length, 2, 'both writes hit SQL'); +}); + +test('updateSubAgent rejects an unknown model id BEFORE touching the DB', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + model: 'definitely-not-real', + }), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'definitely-not-real'/.test(err.message), + ); + assert.equal(calls.length, 0); +}); + +test('updateSubAgent without a `model` patch skips validation entirely', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + name: 'renamed', + }); + assert.equal(calls.length, 1); + assert.match(calls[0]!.sql, /UPDATE agent_subagents/); +}); + +// ── clear path (issue #296 MAJOR#2) ────────────────────────────────────── +// `model = COALESCE($4, model)` can NEVER clear the column — COALESCE(NULL, +// model) keeps the old value. The write must use an explicit "was model in the +// patch?" guard so `null` clears to NULL while `undefined` keeps. + +test('createSubAgent normalises empty-string model to NULL (clean data)', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await store.createSubAgent({ + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'r', + model: ' ', + }); + // INSERT param $4 is the model column — must be NULL, not '' / whitespace. + assert.equal(calls[0]!.params[3], null, 'empty model persisted as NULL'); +}); + +test('updateSubAgent clears model to NULL when patch.model is null', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + const row = await store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + model: null, + }); + const { sql, params } = calls[0]!; + // The write must NOT COALESCE the model column (that would keep the old + // value); it must branch on the "provided" guard and write NULL. + assert.doesNotMatch(sql, /model\s+=\s+COALESCE/); + assert.match(sql, /model\s+=\s+CASE WHEN \$10 THEN \$4 ELSE model END/); + assert.equal(params[3], null, 'model value param is NULL'); + assert.equal(params[9], true, 'model-provided guard is true → clears'); + assert.equal(row.model, null); +}); + +test('updateSubAgent clears model to NULL when patch.model is empty string', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + model: '', + }); + assert.equal(calls[0]!.params[3], null); + assert.equal(calls[0]!.params[9], true); +}); + +test('updateSubAgent keeps model untouched when patch omits it (undefined)', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await store.updateSubAgent('00000000-0000-0000-0000-0000000000aa', { + name: 'renamed', + }); + // model-provided guard is false → the CASE keeps the existing column. + assert.equal(calls[0]!.params[9], false, 'model-provided guard is false → keeps'); +}); + +// ── active-provider scoping (issue #296 MAJOR#3) ───────────────────────── + +test('createSubAgent rejects a cross-provider model when activeProvider is set', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.createSubAgent( + { + parentAgentId: '00000000-0000-0000-0000-000000000001', + name: 'researcher', + model: 'openai:gpt-5.5', + }, + 'anthropic', + ), + (err) => + err instanceof ConfigValidationError && /cross-provider/.test(err.message), + ); + assert.equal(calls.length, 0, 'no SQL for a rejected cross-provider write'); +}); + +test('updateSubAgent rejects a cross-provider model when activeProvider is set', async () => { + const { pool, calls } = fakePool(); + const store = new AgentGraphStore(pool); + await assert.rejects( + () => + store.updateSubAgent( + '00000000-0000-0000-0000-0000000000aa', + { model: 'openai:gpt-5.5' }, + 'anthropic', + ), + (err) => + err instanceof ConfigValidationError && /cross-provider/.test(err.message), + ); + assert.equal(calls.length, 0); +}); diff --git a/middleware/test/configStoreModelRoutingValidation.test.ts b/middleware/test/configStoreModelRoutingValidation.test.ts new file mode 100644 index 00000000..3fa51633 --- /dev/null +++ b/middleware/test/configStoreModelRoutingValidation.test.ts @@ -0,0 +1,259 @@ +/** + * Validation guard on `model_routing` writes (issue #296). + * + * `setModelRouting` accepts a `{ mode, main, triage?, simple? }` JSON. Every + * model id must resolve via `@omadia/llm-provider`; an unknown id would crash + * every turn at runtime with `404 not_found_error`. The shape validator is a + * pure function so we can test it without a Postgres pool. + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { + ConfigValidationError, + validateModelRef, + validateModelRoutingShape, +} from '../packages/harness-orchestrator/src/registry/configStore.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; + +const HAIKU: ModelInfo = { + id: 'anthropic:claude-haiku-4-5', + provider: 'anthropic', + modelId: 'claude-haiku-4-5', + label: 'Claude Haiku 4.5', + class: 'fast', + maxTokens: 8192, + contextWindow: 200000, + vision: true, + aliases: ['haiku'], +}; + +const SONNET: ModelInfo = { + id: 'anthropic:claude-sonnet-4-6', + provider: 'anthropic', + modelId: 'claude-sonnet-4-6', + label: 'Claude Sonnet 4.6', + class: 'balanced', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['sonnet'], +}; + +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, HAIKU, SONNET, GPT]); +}); + +test('accepts single mode with a registered model', () => { + validateModelRoutingShape({ mode: 'single', main: 'claude-opus-4-8' }); + validateModelRoutingShape({ mode: 'single', main: 'anthropic:claude-opus-4-8' }); + validateModelRoutingShape({ mode: 'single', main: 'opus' }); // alias +}); + +test('accepts triage mode with registered classifier + simple', () => { + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'claude-haiku-4-5', + simple: 'claude-sonnet-4-6', + }); +}); + +test('triage mode tolerates omitted/empty triage + simple (run-time defaults)', () => { + validateModelRoutingShape({ mode: 'triage', main: 'claude-opus-4-8' }); + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: '', + simple: '', + }); +}); + +test('rejects unknown main model', () => { + assert.throws( + () => validateModelRoutingShape({ mode: 'single', main: 'made-up-model-9' }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.main 'made-up-model-9'/.test(err.message), + ); +}); + +test('rejects unknown triage / simple in triage mode', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 'no-such-classifier', + }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.triage 'no-such-classifier'/.test(err.message), + ); + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + simple: 'no-such-simple', + }), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.simple 'no-such-simple'/.test(err.message), + ); +}); + +test('rejects missing or empty main', () => { + assert.throws( + () => validateModelRoutingShape({ mode: 'single' } as Record), + (err) => err instanceof ConfigValidationError && /main is required/.test(err.message), + ); + assert.throws( + () => validateModelRoutingShape({ mode: 'single', main: ' ' }), + (err) => err instanceof ConfigValidationError && /main is required/.test(err.message), + ); +}); + +test('rejects unknown mode', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'route-everything', + main: 'claude-opus-4-8', + }), + (err) => err instanceof ConfigValidationError && /mode must be/.test(err.message), + ); +}); + +test('rejects non-string triage / simple', () => { + assert.throws( + () => + validateModelRoutingShape({ + mode: 'triage', + main: 'claude-opus-4-8', + triage: 42, + }), + (err) => + err instanceof ConfigValidationError && /triage must be a string/.test(err.message), + ); +}); + +// ── validateModelRef (sub-agent guard) ────────────────────────────────── + +test('validateModelRef accepts a registered id (raw + alias + provider-qualified)', () => { + validateModelRef('subAgent.model', 'claude-opus-4-8'); + validateModelRef('subAgent.model', 'opus'); + validateModelRef('subAgent.model', 'anthropic:claude-opus-4-8'); +}); + +test('validateModelRef rejects an unknown id and tags the field', () => { + assert.throws( + () => validateModelRef('subAgent.model', 'no-such-model'), + (err) => + err instanceof ConfigValidationError && + /subAgent\.model 'no-such-model'/.test(err.message), + ); +}); + +test('validateModelRef rejects empty / whitespace (callers should skip instead)', () => { + assert.throws( + () => validateModelRef('subAgent.model', ''), + (err) => + err instanceof ConfigValidationError && + /must be a non-empty model ref/.test(err.message), + ); + assert.throws( + () => validateModelRef('subAgent.model', ' '), + (err) => + err instanceof ConfigValidationError && + /must be a non-empty model ref/.test(err.message), + ); +}); + +// ── active-provider scoping (issue #296 MAJOR#3) ───────────────────────── +// The picker lists every connected provider's models, so a cross-provider ref +// (`openai:gpt-5.5` under an anthropic orchestrator) can reach the validator. +// At build it resolves to the wrong provider and is silently dropped to the +// platform default — the Agent never runs on the picked model. Reject it at +// write time so the operator gets an error instead of a phantom pick. + +test('validateModelRef with activeProvider accepts a same-provider ref', () => { + validateModelRef('subAgent.model', 'claude-opus-4-8', 'anthropic'); + validateModelRef('subAgent.model', 'opus', 'anthropic'); + validateModelRef('subAgent.model', 'anthropic:claude-opus-4-8', 'anthropic'); + validateModelRef('subAgent.model', 'gpt-5.5', 'openai'); +}); + +test('validateModelRef with activeProvider rejects a cross-provider ref', () => { + assert.throws( + () => validateModelRef('subAgent.model', 'openai:gpt-5.5', 'anthropic'), + (err) => + err instanceof ConfigValidationError && + /resolves to provider 'openai'/.test(err.message) && + /orchestrator runs on 'anthropic'/.test(err.message), + ); +}); + +test('validateModelRef without activeProvider stays provider-agnostic (legacy)', () => { + // No active provider → the cross-provider guard is off; a registered ref + // passes regardless of provider (pre-#296 behaviour). + validateModelRef('subAgent.model', 'openai:gpt-5.5'); +}); + +test('validateModelRoutingShape with activeProvider rejects a cross-provider main', () => { + assert.throws( + () => + validateModelRoutingShape( + { mode: 'single', main: 'openai:gpt-5.5' }, + 'anthropic', + ), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.main 'openai:gpt-5.5'/.test(err.message) && + /cross-provider/.test(err.message), + ); +}); + +test('validateModelRoutingShape with activeProvider rejects a cross-provider triage sub-model', () => { + assert.throws( + () => + validateModelRoutingShape( + { mode: 'triage', main: 'claude-opus-4-8', triage: 'openai:gpt-5.5' }, + 'anthropic', + ), + (err) => + err instanceof ConfigValidationError && + /modelRouting\.triage 'openai:gpt-5.5'/.test(err.message), + ); +}); diff --git a/middleware/test/orchestratorRegistry.test.ts b/middleware/test/orchestratorRegistry.test.ts index aebd374d..8da6e82b 100644 --- a/middleware/test/orchestratorRegistry.test.ts +++ b/middleware/test/orchestratorRegistry.test.ts @@ -16,10 +16,15 @@ * NOT prevent the other Agents from coming up. */ -import { test } from 'node:test'; +import { beforeEach, test } from 'node:test'; import assert from 'node:assert/strict'; import Anthropic from '@anthropic-ai/sdk'; +import { + clearExternalModels, + registerExternalModels, + type ModelInfo, +} from '@omadia/llm-provider'; import { InMemoryNudgeRegistry } from '@omadia/plugin-api'; import type { EntityRefBus, @@ -40,6 +45,7 @@ import { validateSnapshot, type PluginCapabilityLookup, } from '../packages/harness-orchestrator/src/registry/index.js'; +import { DEFAULT_ORCHESTRATOR_MODEL } from '../packages/harness-orchestrator/src/registry/agentRuntime.js'; function fakeNativeToolRegistry(): NativeToolRegistry { const names = new Set(); @@ -65,6 +71,14 @@ function deps(): OrchestratorDeps { }; } +/** `deps()` with a concrete provider id so the build path can pin model-ref + * resolution to a single provider (and drop cross-provider picks). The + * orchestrator only touches the provider at turn time, so a stub id suffices + * for build-time resolution assertions. */ +function depsWithProvider(id: string): OrchestratorDeps { + return { ...deps(), provider: { id } as OrchestratorDeps['provider'] }; +} + function fakeStore(snapshot: ConfigSnapshot): ConfigStore { return { loadSnapshot: () => Promise.resolve(snapshot), @@ -84,6 +98,61 @@ function agentRow(slug: string, id: string, status: AgentRow['status'] = 'enable }; } +// The build path resolves a per-Agent model ref to the active provider's +// concrete `modelId` via `@omadia/llm-provider`. Register the ids the tests +// pin so resolution lands on them instead of dropping to the platform default. +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; +const HAIKU: ModelInfo = { + id: 'anthropic:claude-haiku-4-5', + provider: 'anthropic', + modelId: 'claude-haiku-4-5', + label: 'Claude Haiku 4.5', + class: 'fast', + maxTokens: 8192, + contextWindow: 200000, + vision: true, + aliases: ['haiku'], +}; +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; +// The subscription-CLI provider registers `-cli`-suffixed modelIds so they +// never collide with another provider's alias; the claude-cli adapter strips +// the suffix back to the bare CLI alias (`opus`) for `claude -p --model`. +const OPUS_CLI: ModelInfo = { + id: 'claude-cli:opus-cli', + provider: 'claude-cli', + modelId: 'opus-cli', + label: 'Claude Opus (CLI)', + class: 'frontier', + maxTokens: 32000, + contextWindow: 200000, + vision: false, +}; + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, HAIKU, GPT, OPUS_CLI]); +}); + const twoAgentSnapshot: ConfigSnapshot = { agents: [ agentRow('public', '00000000-0000-0000-0000-000000000001'), @@ -386,3 +455,317 @@ test('SC-007 / T018: a build-time failure for Agent B does NOT prevent Agent A', // second build trip was caught and logged, not propagated. assert.ok(registry.size() >= 1, 'at least one agent survived isolation'); }); + +test('issue #296 acceptance #4: `agent added` log includes effectiveModel + effectiveModelRouting', async () => { + // Per-agent overlay: agent persists `model_routing.main = haiku` so the + // registry must build it on haiku instead of the platform default `m-default`. + // The boot log line is the operator-visible surface for that resolution. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const logged: Array<{ msg: string; fields?: Record }> = []; + const registry = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await registry.start(); + + const added = logged.find((l) => l.msg === 'registry: agent added'); + assert.ok(added, '`agent added` log was emitted'); + assert.equal(added.fields?.['slug'], 'public'); + assert.equal( + added.fields?.['effectiveModel'], + 'claude-haiku-4-5', + 'effectiveModel reflects per-agent overlay, not platform default', + ); + + // Triage overlay also surfaces effectiveModelRouting so an operator can + // confirm per-turn routing is wired without reading the orchestrator state. + const triageSnap: ConfigSnapshot = { + ...snapshot, + agents: [ + { + ...agentRow('triaged', '00000000-0000-0000-0000-000000000099'), + modelRouting: { mode: 'triage', main: 'claude-opus-4-8' }, + }, + ], + }; + logged.length = 0; + const r2 = new OrchestratorRegistry(fakeStore(triageSnap), deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await r2.start(); + const addedTriage = logged.find((l) => l.msg === 'registry: agent added'); + assert.equal(addedTriage?.fields?.['effectiveModel'], 'claude-opus-4-8'); + assert.ok( + addedTriage?.fields?.['effectiveModelRouting'], + 'effectiveModelRouting present in log when agent is on triage mode', + ); +}); + +test('issue #296 acceptance #4: `agent rebuilt` log includes the new effectiveModel after a routing change', async () => { + // Hot-apply path: a routing edit triggers a rebuild (see + // `runtimeChangeReasons:model_routing`). The rebuild log must carry the + // freshly-resolved effectiveModel so the operator can confirm the change + // landed without restarting. + const before: ConfigSnapshot = { + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const after: ConfigSnapshot = { + ...before, + agents: [ + { + ...agentRow('public', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-opus-4-8' }, + }, + ], + }; + let current: ConfigSnapshot = before; + const store: ConfigStore = { + loadSnapshot: () => Promise.resolve(current), + } as unknown as ConfigStore; + const logged: Array<{ msg: string; fields?: Record }> = []; + const registry = new OrchestratorRegistry(store, deps(), { + defaultRuntimeConfig: { model: 'm-default', maxTokens: 100, maxToolIterations: 4 }, + log: (msg, fields) => logged.push({ msg, ...(fields ? { fields } : {}) }), + }); + await registry.start(); + current = after; + await registry.reload(); + + const rebuilt = logged.find((l) => l.msg === 'registry: agent rebuilt'); + assert.ok(rebuilt, '`agent rebuilt` log was emitted on reload'); + assert.equal(rebuilt.fields?.['effectiveModel'], 'claude-opus-4-8'); + assert.match( + String(rebuilt.fields?.['reason']), + /model_routing/, + 'rebuild reason cites the routing change', + ); +}); + +test('issue #296 AC#2: per-instance model resolution is 3-tier (per-Agent → platform default → DEFAULT_ORCHESTRATOR_MODEL)', async () => { + // Tier 1 (per-Agent overlay) wins over the platform default; an Agent with no + // overlay falls to tier 2 (platform default); a blank platform default falls + // to tier 3 (DEFAULT_ORCHESTRATOR_MODEL) so the turn loop never gets an empty + // model id (which would 404 on every turn). + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('overridden', '00000000-0000-0000-0000-000000000001'), + modelRouting: { mode: 'single', main: 'claude-haiku-4-5' }, + }, + // No modelRouting → inherits the platform default (tier 2). + agentRow('inherits', '00000000-0000-0000-0000-000000000002'), + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + + // Tier 1 + tier 2 with a real platform default. + const r1 = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: 'platform-default', maxTokens: 100, maxToolIterations: 4 }, + }); + await r1.start(); + assert.equal( + r1.get('overridden')?.built.effectiveModel, + 'claude-haiku-4-5', + 'tier 1: per-Agent overlay wins', + ); + assert.equal( + r1.get('inherits')?.built.effectiveModel, + 'platform-default', + 'tier 2: no overlay falls to the platform default', + ); + + // Tier 3: a blank platform default + no overlay falls to the hard fallback. + const r2 = new OrchestratorRegistry(fakeStore(snapshot), deps(), { + defaultRuntimeConfig: { model: ' ', maxTokens: 100, maxToolIterations: 4 }, + }); + await r2.start(); + assert.equal( + r2.get('inherits')?.built.effectiveModel, + DEFAULT_ORCHESTRATOR_MODEL, + 'tier 3: blank platform default falls to DEFAULT_ORCHESTRATOR_MODEL', + ); +}); + +test('issue #296 BLOCKER: per-Agent model ref resolves to the active provider bare modelId (never sent raw)', async () => { + // The Admin picker writes a provider-qualified id; aliases are also valid. + // The orchestrator sends `model` RAW to a single concrete adapter, so the + // build path must resolve the overlay to the bare vendor `modelId` — sending + // `anthropic:claude-opus-4-8` or `opus` raw 404s every turn. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('qualified', '00000000-0000-0000-0000-0000000000a1'), + modelRouting: { mode: 'single', main: 'anthropic:claude-opus-4-8' }, + }, + { + ...agentRow('alias', '00000000-0000-0000-0000-0000000000a2'), + modelRouting: { mode: 'single', main: 'opus' }, + }, + { + // triage sub-models are sent raw too — they must resolve as well. + ...agentRow('triaged', '00000000-0000-0000-0000-0000000000a3'), + modelRouting: { + mode: 'triage', + main: 'anthropic:claude-opus-4-8', + triage: 'haiku', + simple: 'anthropic:claude-haiku-4-5', + }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + await registry.start(); + + assert.equal( + registry.get('qualified')?.built.effectiveModel, + 'claude-opus-4-8', + 'provider-qualified id resolves to the bare modelId', + ); + assert.equal( + registry.get('alias')?.built.effectiveModel, + 'claude-opus-4-8', + 'legacy alias resolves to the bare modelId', + ); + const triaged = registry.get('triaged')?.built; + assert.equal(triaged?.effectiveModel, 'claude-opus-4-8'); + assert.deepEqual( + triaged?.effectiveModelRouting, + { + classifierModel: 'claude-haiku-4-5', + simpleModel: 'claude-haiku-4-5', + complexModel: 'claude-opus-4-8', + }, + 'every per-turn routing sub-model is resolved to a bare modelId', + ); +}); + +test('issue #296 FIX2: a cross-provider per-Agent pick is guarded at write time, not silently dropped at build', async () => { + // The orchestrator runs on ONE provider (`anthropic` here). A cross-provider + // pick (`openai:gpt-5.5`) is REJECTED at write time by the provider-scoped + // model-routing validator (see configStoreModelRoutingValidation.test.ts), so + // it can never be persisted for a fresh write. The build path itself does not + // re-drop it: an unresolved ref falls through raw (fail-loud at runtime, no + // silent switch to a different model). This test pins that build-time + // contract for the residual/stale case. + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('crossprovider', '00000000-0000-0000-0000-0000000000b1'), + modelRouting: { mode: 'single', main: 'openai:gpt-5.5' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + await registry.start(); + assert.equal( + registry.get('crossprovider')?.built.effectiveModel, + 'openai:gpt-5.5', + 'a cross-provider ref that escaped write-validation is passed through raw (fails loud), not silently dropped', + ); +}); + +test('issue #296 BLOCKER (claude-cli): a provider-qualified CLI pick resolves to the bare CLI modelId, never sent raw', async () => { + // On a keyless subscription-CLI deployment the picker offers only + // `claude-cli:opus-cli`. If the build passes that ref raw, buildOrchestrator's + // `-cli` strip yields `claude-cli:opus` → an invalid `claude -p --model`, + // failing every turn. The overlay MUST resolve it to the bare `opus-cli` + // (which the downstream strip turns into `opus`, the alias the CLI expects). + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('cli', '00000000-0000-0000-0000-0000000000d1'), + modelRouting: { mode: 'single', main: 'claude-cli:opus-cli' }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('claude-cli'), + { defaultRuntimeConfig: { model: 'opus-cli', maxTokens: 100, maxToolIterations: 4 } }, + ); + await registry.start(); + assert.equal( + registry.get('cli')?.built.effectiveModel, + 'opus-cli', + 'CLI picker id resolves to the bare modelId (not raw claude-cli:opus-cli)', + ); +}); + +test('issue #296 regression: a registry-unknown per-turn classifier is passed through raw, NOT collapsed to the main model', () => { + // The default classifier (`DEFAULT_CLASSIFIER_MODEL`) and any operator-typed + // id may be absent from the curated registry yet still served by the API. + // Resolution must pass it through, not drop it — dropping would silently run + // the cheap-classifier turn on the (expensive) main model. + const undatedClassifier = 'classifier-not-in-registry'; + const snapshot: ConfigSnapshot = { + agents: [ + { + ...agentRow('triaged', '00000000-0000-0000-0000-0000000000c1'), + modelRouting: { + mode: 'triage', + main: 'anthropic:claude-opus-4-8', + triage: undatedClassifier, + }, + }, + ], + agentPlugins: [], + channelBindings: [], + platformSettings: { fallbackAgentId: null, updatedAt: new Date(0) }, + }; + const registry = new OrchestratorRegistry( + fakeStore(snapshot), + depsWithProvider('anthropic'), + { defaultRuntimeConfig: { model: 'claude-opus-4-8', maxTokens: 100, maxToolIterations: 4 } }, + ); + return registry.start().then(() => { + const routing = registry.get('triaged')?.built.effectiveModelRouting; + assert.equal( + routing?.classifierModel, + undatedClassifier, + 'unknown classifier passes through raw', + ); + assert.notEqual( + routing?.classifierModel, + 'claude-opus-4-8', + 'unknown classifier is NOT collapsed to the main model', + ); + }); +}); diff --git a/middleware/test/subAgentModelResolution.test.ts b/middleware/test/subAgentModelResolution.test.ts new file mode 100644 index 00000000..1a612627 --- /dev/null +++ b/middleware/test/subAgentModelResolution.test.ts @@ -0,0 +1,146 @@ +/** + * Sub-agent model-ref resolution (issue #296 MAJOR 3). + * + * `LocalSubAgent` sends its `model` RAW to the provider adapter — exactly like + * the orchestrator main loop. So a picker-stored provider-qualified id or alias + * must be resolved to the active provider's bare `modelId` before it reaches the + * sub-agent, or every delegated turn 404s. `resolveSubAgentModel` is the pure + * resolver the builder uses; tested directly (the resolved id is otherwise + * private on `LocalSubAgent`). + */ + +import assert from 'node:assert/strict'; +import { beforeEach, test } from 'node:test'; + +import { + clearExternalModels, + registerExternalModels, + type LlmProvider, + type ModelInfo, +} from '@omadia/llm-provider'; + +import { + resolveSubAgentModel, + resolveCliSubAgentModel, +} from '../packages/harness-orchestrator/src/registry/subAgentTools.js'; + +const OPUS: ModelInfo = { + id: 'anthropic:claude-opus-4-8', + provider: 'anthropic', + modelId: 'claude-opus-4-8', + label: 'Claude Opus 4.8', + class: 'frontier', + maxTokens: 16384, + contextWindow: 200000, + vision: true, + aliases: ['opus'], +}; +const GPT: ModelInfo = { + id: 'openai:gpt-5.5', + provider: 'openai', + modelId: 'gpt-5.5', + label: 'GPT-5.5', + class: 'frontier', + maxTokens: 16384, + contextWindow: 400000, + vision: true, + aliases: [], +}; +const OPUS_CLI: ModelInfo = { + id: 'claude-cli:opus-cli', + provider: 'claude-cli', + modelId: 'opus-cli', + label: 'Claude Opus (CLI)', + class: 'frontier', + maxTokens: 32000, + contextWindow: 200000, + vision: false, +}; + +const DEFAULT_MODEL = 'claude-sonnet-4-6'; + +function deps(providerId?: string): { + provider: LlmProvider; + defaultModel: string; +} { + return { + provider: (providerId ? { id: providerId } : {}) as LlmProvider, + defaultModel: DEFAULT_MODEL, + }; +} + +beforeEach(() => { + clearExternalModels(); + registerExternalModels([OPUS, GPT, OPUS_CLI]); +}); + +test('null / empty / whitespace ref → parent default model (inherit)', () => { + assert.equal(resolveSubAgentModel(null, deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel(undefined, deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel('', deps('anthropic')), DEFAULT_MODEL); + assert.equal(resolveSubAgentModel(' ', deps('anthropic')), DEFAULT_MODEL); +}); + +test('provider-qualified id, alias, and bare id all resolve to the bare modelId', () => { + assert.equal( + resolveSubAgentModel('anthropic:claude-opus-4-8', deps('anthropic')), + 'claude-opus-4-8', + ); + assert.equal(resolveSubAgentModel('opus', deps('anthropic')), 'claude-opus-4-8'); + assert.equal( + resolveSubAgentModel('claude-opus-4-8', deps('anthropic')), + 'claude-opus-4-8', + ); +}); + +test('cross-provider pick → parent default (would 404 on the wrong adapter)', () => { + assert.equal( + resolveSubAgentModel('openai:gpt-5.5', deps('anthropic')), + DEFAULT_MODEL, + 'openai model on an anthropic-bound parent is dropped', + ); +}); + +test('registry-unknown ref → passed through raw (curated registry is not the universe of valid ids; writes are validation-guarded)', () => { + // A non-empty ref the registry does not list is passed through unchanged — + // the API may still serve it (e.g. an undated id). New writes are blocked by + // the `validateModelRef` guard, so this only matters for stale configs. + assert.equal( + resolveSubAgentModel('made-up-model-9', deps('anthropic')), + 'made-up-model-9', + ); +}); + +test('no active provider id → resolves against the anthropic default, no cross-provider drop', () => { + // A deps with no provider id (older boot / stub) still resolves a known ref to + // its bare modelId; the cross-provider guard is simply skipped. + assert.equal(resolveSubAgentModel('opus', deps()), 'claude-opus-4-8'); +}); + +// ── CLI sub-agent path (issue #296 BLOCKER) ────────────────────────────── +// The CLI sub-agent feeds the resolved id through `cliModelAlias` (strips +// `-cli`). The ref must resolve to the bare CLI modelId FIRST, or a +// picker-stored `claude-cli:opus-cli` reaches the strip raw → `claude-cli:opus`, +// an invalid `--model` on every delegated turn — the same failure the main +// model had. + +test('resolveCliSubAgentModel resolves a CLI picker id to the bare modelId (→ strips to opus)', () => { + // opus-cli is what cliModelAlias then strips `-cli` from → `opus`. + assert.equal(resolveCliSubAgentModel('claude-cli:opus-cli', DEFAULT_MODEL), 'opus-cli'); +}); + +test('resolveCliSubAgentModel passes a legacy bare alias / full id through untouched', () => { + // `opus` / `claude-opus-4-8` resolve cross-provider to undefined under + // claude-cli and fall through raw so cliModelAlias handles them as before. + assert.equal(resolveCliSubAgentModel('opus', DEFAULT_MODEL), 'opus'); + assert.equal( + resolveCliSubAgentModel('claude-opus-4-8', DEFAULT_MODEL), + 'claude-opus-4-8', + ); +}); + +test('resolveCliSubAgentModel inherits the parent default for empty / null refs', () => { + assert.equal(resolveCliSubAgentModel(null, DEFAULT_MODEL), DEFAULT_MODEL); + assert.equal(resolveCliSubAgentModel('', DEFAULT_MODEL), DEFAULT_MODEL); + assert.equal(resolveCliSubAgentModel(' ', DEFAULT_MODEL), DEFAULT_MODEL); +}); diff --git a/web-ui/app/_lib/agentBuilder.ts b/web-ui/app/_lib/agentBuilder.ts index 188e9b1a..708a0d37 100644 --- a/web-ui/app/_lib/agentBuilder.ts +++ b/web-ui/app/_lib/agentBuilder.ts @@ -98,6 +98,13 @@ export interface AgentNode { privacyProfile: PrivacyProfile; status: NodeStatus; modelRouting: ModelRoutingConfig | null; + /** + * Resolved orchestrator model the registry currently runs this Agent on + * (per-Agent overlay applied to the platform default). `null` when the + * registry has not yet built the Agent (in-memory bootstrap / Agent + * disabled). Issue #296 acceptance #4. + */ + effectiveModel: string | null; position: CanvasPosition | null; } diff --git a/web-ui/app/_lib/builderTypes.ts b/web-ui/app/_lib/builderTypes.ts index 3910bca7..ab2e3ea2 100644 --- a/web-ui/app/_lib/builderTypes.ts +++ b/web-ui/app/_lib/builderTypes.ts @@ -22,7 +22,13 @@ export interface QualityConfig { export type BuilderModelId = string; export interface BuilderModelInfo { + /** Provider-qualified id (e.g. `anthropic:claude-opus-4-8`) — the value the + * picker writes. */ id: BuilderModelId; + /** Bare vendor model id (e.g. `claude-opus-4-8`). Persisted configs from + * before the provider-qualified picker store this form, so the Inspector + * matches against it to avoid flagging a valid model as "stale". */ + model_id: string; label: string; provider: string; model_class: string; diff --git a/web-ui/app/admin/builder/panels/InspectorPanel.tsx b/web-ui/app/admin/builder/panels/InspectorPanel.tsx index daf7f2d5..3bd876e6 100644 --- a/web-ui/app/admin/builder/panels/InspectorPanel.tsx +++ b/web-ui/app/admin/builder/panels/InspectorPanel.tsx @@ -1,7 +1,7 @@ 'use client'; import { useTranslations } from 'next-intl'; -import { useState } from 'react'; +import { useEffect, useState } from 'react'; import { discoverMcpTools, @@ -16,6 +16,8 @@ import { type SkillNode, type SubAgentNode, } from '../../../_lib/agentBuilder'; +import { listBuilderModels } from '../../../_lib/api'; +import type { BuilderModelInfo } from '../../../_lib/builderTypes'; import { Button } from '@/app/_components/ui/Button'; import type { BuilderNodeData } from '../nodes/types'; import { Field, inputCls, SaveButton } from './InspectorControls'; @@ -118,6 +120,7 @@ function AgentEditor({ const [triage, setTriage] = useState(r?.triage ?? ''); const [simple, setSimple] = useState(r?.simple ?? ''); const { pending, error, run } = useSaver(); + const catalog = useModelCatalog(); async function save(): Promise { await run(async () => { @@ -132,9 +135,24 @@ function AgentEditor({ }); } + // Picker placeholder for `main`: while the catalog is loading we show + // "Loading models…"; afterwards prefer the platform default ("(platform + // default: X)") and fall back to a neutral "(default)" so a loaded catalog + // without a declared default never displays the stale loading copy. + const mainPlaceholder = catalog.loading + ? t('inspector.modelPickerLoading') + : catalog.defaultId + ? t('inspector.platformDefault', { model: catalog.defaultId }) + : t('inspector.modelDefault'); + return (

{agent.name}

+ {agent.effectiveModel && ( +

+ {t('inspector.effectiveModel', { model: agent.effectiveModel })} +

+ )} setMain(e.target.value)} className={inputCls} /> + {mode === 'triage' && ( <> - setTriage(e.target.value)} className={inputCls} /> + - setSimple(e.target.value)} className={inputCls} /> + )} - void save()} pending={pending} label={t('inspector.save')} /> + + void save()} + pending={pending || catalog.loading} + label={t('inspector.save')} + />
); } +interface ModelCatalogState { + models: BuilderModelInfo[]; + defaultId: string | null; + loading: boolean; + error: string | null; +} + +function useModelCatalog(): ModelCatalogState { + const [state, setState] = useState({ + models: [], + defaultId: null, + loading: true, + error: null, + }); + useEffect(() => { + let alive = true; + void (async () => { + try { + const res = await listBuilderModels(); + if (!alive) return; + setState({ + models: res.models, + defaultId: res.default ?? null, + loading: false, + error: null, + }); + } catch (err) { + if (!alive) return; + setState({ + models: [], + defaultId: null, + loading: false, + error: err instanceof Error ? err.message : String(err), + }); + } + })(); + return () => { + alive = false; + }; + }, []); + return state; +} + +function ModelSelect({ + value, + onChange, + catalog, + placeholder, +}: { + value: string; + onChange: (next: string) => void; + catalog: ModelCatalogState; + placeholder: string; +}): React.ReactElement { + // Stale-binding: a persisted value that no installed provider serves anymore. + // Keep it visible (disabled) so the operator sees what is set and can pick a + // replacement instead of having the field silently snap to a different model. + // Match the provider-qualified id, the bare vendor id (pre-picker configs and + // the platform default persist this form), and aliases — only a value that + // resolves to NO registered model is genuinely stale. + const known = catalog.models.some( + (m) => m.id === value || m.model_id === value || m.aliases.includes(value), + ); + const showStale = value !== '' && !known && !catalog.loading; + return ( + + ); +} + // ── Sub-agent ────────────────────────────────────────────────────────────── function SubAgentEditor({ slug, @@ -179,6 +303,10 @@ function SubAgentEditor({ const [model, setModel] = useState(subAgent.model ?? ''); const [prompt, setPrompt] = useState(subAgent.systemPromptOverride ?? ''); const { pending, error, run } = useSaver(); + // Issue #296 follow-up — sub-agent picker uses the same registry-driven + // dropdown as the parent agent so an operator cannot pin a sub-agent to a + // model id no installed provider serves. + const catalog = useModelCatalog(); async function save(): Promise { await run(async () => { @@ -197,7 +325,12 @@ function SubAgentEditor({ setName(e.target.value)} className={inputCls} /> - setModel(e.target.value)} className={inputCls} /> +