From bf7f32fea2246a17968cf4f7a3be0175dc0b1cb3 Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 14:45:23 -0700
Subject: [PATCH 1/6] feat(models): add ollama backend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First real `ModelBackend` against the Ollama HTTP API: `embed` via
`/api/embed`, `generate` via `/api/generate` or `/api/chat`, and
`generateStream` via NDJSON over chunked HTTP.

Backend lives under `components/<name>/` matching the pattern from the
MCP foundation (PR #649) — core imports `registerOllamaBackend` and calls
it during boot; the file is not a `handleApplication(scope)` self-loader.

Capabilities advertise `tools: false` and `adapters: false`. Ollama tool
support exists on some models but is uneven across the catalog; the v1
portability guarantee keeps it off here.

Validates the Phase 1 (#628 / PR #638) `ModelBackend` interface against a
non-trivial real provider without external dependencies in CI.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 components/ollama/index.ts | 353 +++++++++++++++++++++++++++++++++++++
 1 file changed, 353 insertions(+)
 create mode 100644 components/ollama/index.ts
diff --git a/components/ollama/index.ts b/components/ollama/index.ts
new file mode 100644
index 000000000..723f578b3
--- /dev/null
+++ b/components/ollama/index.ts
@@ -0,0 +1,353 @@
+/**
+ * Ollama backend (#629, Phase 2 of #510).
+ *
+ * Implements `ModelBackend` against a local or remote Ollama HTTP API.
+ * Exports `OllamaBackend` directly for tests and `registerOllamaBackend(...)`
+ * for the YAML→registry boot bridge in `resources/models/bootstrap.ts`.
+ *
+ * Component shape matches the pattern in `components/mcp/index.ts` (PR #649):
+ * core imports a register helper and calls it during boot; not a
+ * `handleApplication(scope)` self-loader.
+ */
+import { setEmbedding, setGenerative } from '../../resources/models/backendRegistry.ts';
+import { ServerError } from '../../utility/errors/hdbError.ts';
+import type {
+	BackendOpts,
+	EmbedOpts,
+	GenerateChunk,
+	GenerateInput,
+	GenerateOpts,
+	GenerateResult,
+	Message,
+	ModelBackend,
+	ModelCallResult,
+	ModelCapabilities,
+	TokenUsage,
+} from '../../resources/models/types.ts';
+
+const DEFAULT_HOST = 'localhost:11434';
+const MAX_NDJSON_LINE_BYTES = 1 << 20; // 1 MiB — Ollama chunks are normally sub-KiB; anything larger is pathological.
+
+export type OllamaBackendKind = 'embedding' | 'generative';
+
+export interface OllamaBackendConfig {
+	/** Host:port (default `localhost:11434`) or full origin (`https://ollama.example.com`). */
+	host?: string;
+	/** Default model when the caller doesn't pass `opts.model`. */
+	model?: string;
+	/** Per-request timeout. When set, combined with `opts.signal` via `AbortSignal.any`. */
+	requestTimeoutMs?: number;
+}
+
+/**
+ * `ModelBackend` implementation talking to Ollama's HTTP API.
+ *
+ * - `embed` → `POST /api/embed` (the legacy `/api/embeddings` is deprecated upstream).
+ * - `generate` → `POST /api/generate` for string prompts, `POST /api/chat` for
+ *   messages-array input.
+ * - `generateStream` → same routing as `generate` with `stream: true`; consumes
+ *   Ollama's NDJSON wire format and yields `GenerateChunk` per JSON line.
+ *
+ * Capabilities advertise `tools: false` and `adapters: false`. Ollama tool-call
+ * support exists on some models but is uneven across the model catalog; we keep
+ * the v1 portability guarantee honest and skip them here.
+ */
+export class OllamaBackend implements ModelBackend {
+	readonly name = 'ollama';
+	readonly #origin: string;
+	readonly #defaultModel?: string;
+	readonly #requestTimeoutMs?: number;
+	readonly #fetch: typeof fetch;
+
+	constructor(config: OllamaBackendConfig = {}, fetchImpl: typeof fetch = fetch) {
+		this.#origin = normalizeOrigin(config.host);
+		this.#defaultModel = config.model;
+		this.#requestTimeoutMs = config.requestTimeoutMs;
+		this.#fetch = fetchImpl;
+	}
+
+	capabilities(): ModelCapabilities {
+		return { embed: true, generate: true, stream: true, tools: false, adapters: false };
+	}
+
+	async embed(input: string | string[], opts: BackendOpts<EmbedOpts>): Promise<ModelCallResult<Float32Array[]>> {
+		const model = opts.model ?? this.#defaultModel;
+		requireModel(model, 'embed');
+		const texts = Array.isArray(input) ? input : [input];
+		const prepared = texts.map((t) => applyEmbedPrefix(model, t, opts.inputType));
+		const res = await this.#post('/api/embed', { model, input: prepared }, opts.signal);
+		const data = await parseJsonResponse<OllamaEmbedResponse>(res, '/api/embed');
+		if (!Array.isArray(data.embeddings)) {
+			throw new OllamaBackendError("Ollama /api/embed response missing 'embeddings' array");
+		}
+		if (data.embeddings.length !== prepared.length) {
+			throw new OllamaBackendError(
+				`Ollama /api/embed returned ${data.embeddings.length} vectors for ${prepared.length} inputs`
+			);
+		}
+		const output = data.embeddings.map((v, i) => {
+			if (!Array.isArray(v) || !v.every(Number.isFinite)) {
+				throw new OllamaBackendError(`Ollama /api/embed vector at index ${i} is not an array of finite numbers`);
+			}
+			return Float32Array.from(v);
+		});
+		const usage: TokenUsage = {};
+		assignFiniteTokenCount(usage, 'embeddingTokens', data.prompt_eval_count);
+		return { status: 'completed', output, usage };
+	}
+
+	async generate(input: GenerateInput, opts: BackendOpts<GenerateOpts>): Promise<ModelCallResult<GenerateResult>> {
+		const model = opts.model ?? this.#defaultModel;
+		requireModel(model, 'generate');
+		const { endpoint, body } = buildGenerateRequest(model, input, opts, false);
+		const res = await this.#post(endpoint, body, opts.signal);
+		const data = await parseJsonResponse<OllamaGenerateResponse & OllamaChatResponse>(res, endpoint);
+		const rawContent = endpoint === '/api/chat' ? data.message?.content : data.response;
+		if (rawContent !== undefined && typeof rawContent !== 'string') {
+			throw new OllamaBackendError(`Ollama ${endpoint} response content is not a string`);
+		}
+		const usage: TokenUsage = {};
+		assignFiniteTokenCount(usage, 'promptTokens', data.prompt_eval_count);
+		assignFiniteTokenCount(usage, 'completionTokens', data.eval_count);
+		return {
+			status: 'completed',
+			output: { content: rawContent ?? '', finishReason: mapFinishReason(data.done_reason) },
+			usage,
+		};
+	}
+
+	async *generateStream(input: GenerateInput, opts: BackendOpts<GenerateOpts>): AsyncIterable<GenerateChunk> {
+		const model = opts.model ?? this.#defaultModel;
+		requireModel(model, 'generateStream');
+		const { endpoint, body } = buildGenerateRequest(model, input, opts, true);
+		const res = await this.#post(endpoint, body, opts.signal);
+		if (!res.body) throw new OllamaBackendError(`Ollama ${endpoint} returned no body for streaming`);
+		for await (const obj of readNdjson(res.body)) {
+			yield toGenerateChunk(obj, endpoint);
+		}
+	}
+
+	async #post(path: string, body: object, callerSignal?: AbortSignal): Promise<Response> {
+		const signal = composeSignal(callerSignal, this.#requestTimeoutMs);
+		const res = await this.#fetch(`${this.#origin}${path}`, {
+			method: 'POST',
+			headers: { 'Content-Type': 'application/json' },
+			body: JSON.stringify(body),
+			signal,
+		});
+		if (!res.ok) {
+			throw new OllamaBackendError(`Ollama ${path} returned HTTP ${res.status}`);
+		}
+		return res;
+	}
+}
+
+/**
+ * Boot-bridge helper. Called from `resources/models/bootstrap.ts` for each
+ * `models.embedding.<name>` / `models.generative.<name>` entry whose
+ * `backend: ollama`.
+ */
+export function registerOllamaBackend(args: {
+	logicalName: string;
+	kind: OllamaBackendKind;
+	config: OllamaBackendConfig;
+}): void {
+	const backend = new OllamaBackend(args.config);
+	if (args.kind === 'embedding') setEmbedding(args.logicalName, backend);
+	else setGenerative(args.logicalName, backend);
+}
+
+export class OllamaBackendError extends ServerError {
+	constructor(message: string) {
+		super(message);
+		this.name = 'OllamaBackendError';
+	}
+}
+
+// ---------- internals ----------
+
+function normalizeOrigin(host?: string): string {
+	const value = host?.trim() || DEFAULT_HOST;
+	const withScheme = /^https?:\/\//i.test(value) ? value : `http://${value}`;
+	return withScheme.replace(/\/+$/, '');
+}
+
+function requireModel(model: string | undefined, op: string): asserts model is string {
+	if (!model) throw new OllamaBackendError(`No model specified for ${op}; set 'model' in config or pass opts.model`);
+}
+
+function composeSignal(caller?: AbortSignal, timeoutMs?: number): AbortSignal | undefined {
+	if (!timeoutMs) return caller;
+	const timeout = AbortSignal.timeout(timeoutMs);
+	if (!caller) return timeout;
+	return AbortSignal.any([caller, timeout]);
+}
+
+function applyEmbedPrefix(model: string, text: string, inputType?: 'document' | 'query'): string {
+	if (!inputType) return text;
+	// nomic-embed-text v1.5+ uses these application-layer prefixes to distinguish
+	// document-corpus encodings from query encodings. Models that don't recognize
+	// them silently return slightly different (still usable) vectors. Other model
+	// families (BGE, e5, etc.) use their own conventions; add cases as we validate.
+	if (/nomic-embed-text/i.test(model)) {
+		return (inputType === 'document' ? 'search_document: ' : 'search_query: ') + text;
+	}
+	return text;
+}
+
+interface BuiltRequest {
+	endpoint: '/api/generate' | '/api/chat';
+	body: Record<string, unknown>;
+}
+
+function buildGenerateRequest(
+	model: string,
+	input: GenerateInput,
+	opts: BackendOpts<GenerateOpts>,
+	stream: boolean
+): BuiltRequest {
+	const optionsBag = buildOptionsBag(opts);
+	if (typeof input === 'string') {
+		return { endpoint: '/api/generate', body: { model, prompt: input, stream, ...optionsBag } };
+	}
+	const { messages, system } = normalizeMessages(input);
+	// Ollama chat has no top-level system field; prepend it as the first message
+	// when the caller supplied one separately.
+	const chatMessages = system
+		? [{ role: 'system' as const, content: system }, ...messages.map(toOllamaMessage)]
+		: messages.map(toOllamaMessage);
+	return { endpoint: '/api/chat', body: { model, messages: chatMessages, stream, ...optionsBag } };
+}
+
+function buildOptionsBag(opts: BackendOpts<GenerateOpts>): Record<string, unknown> {
+	const out: Record<string, unknown> = {};
+	const options: Record<string, unknown> = {};
+	if (typeof opts.temperature === 'number') options.temperature = opts.temperature;
+	if (typeof opts.maxTokens === 'number') options.num_predict = opts.maxTokens;
+	if (Object.keys(options).length > 0) out.options = options;
+	if (opts.responseFormat === 'json') {
+		out.format = 'json';
+	} else if (
+		opts.responseFormat &&
+		typeof opts.responseFormat === 'object' &&
+		'schema' in opts.responseFormat
+	) {
+		out.format = opts.responseFormat.schema;
+	}
+	return out;
+}
+
+function normalizeMessages(input: Exclude<GenerateInput, string>): { messages: Message[]; system?: string } {
+	if (Array.isArray(input)) return { messages: input };
+	return { messages: input.messages, system: input.system };
+}
+
+function toOllamaMessage(m: Message): { role: string; content: string } {
+	// Tools intentionally not forwarded — see capabilities().tools = false.
+	return { role: m.role, content: m.content };
+}
+
+function mapFinishReason(reason?: string): GenerateResult['finishReason'] {
+	switch (reason) {
+		case 'length':
+			return 'length';
+		case 'stop':
+		default:
+			return 'stop';
+	}
+}
+
+function toGenerateChunk(data: OllamaStreamChunk, endpoint: '/api/generate' | '/api/chat'): GenerateChunk {
+	const chunk: GenerateChunk = {};
+	const deltaContent = endpoint === '/api/chat' ? data.message?.content : data.response;
+	if (typeof deltaContent === 'string' && deltaContent.length > 0) chunk.deltaContent = deltaContent;
+	if (data.done === true) chunk.finishReason = mapFinishReason(data.done_reason);
+	return chunk;
+}
+
+async function* readNdjson(body: ReadableStream<Uint8Array>): AsyncGenerator<OllamaStreamChunk> {
+	const decoder = new TextDecoder('utf-8');
+	let buf = '';
+	for await (const chunk of body as unknown as AsyncIterable<Uint8Array>) {
+		buf += decoder.decode(chunk, { stream: true });
+		if (buf.length > MAX_NDJSON_LINE_BYTES) {
+			throw new OllamaBackendError(
+				`Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline`
+			);
+		}
+		let nl: number;
+		while ((nl = buf.indexOf('\n')) >= 0) {
+			const line = buf.slice(0, nl).trim();
+			buf = buf.slice(nl + 1);
+			if (!line) continue;
+			yield parseJsonLine(line);
+		}
+	}
+	buf += decoder.decode();
+	const tail = buf.trim();
+	if (tail) yield parseJsonLine(tail);
+}
+
+function parseJsonLine(line: string): OllamaStreamChunk {
+	try {
+		return JSON.parse(line) as OllamaStreamChunk;
+	} catch {
+		// Deliberately static — the JSON parser's message echoes the offending bytes,
+		// which can include upstream-derived content. Matches the sanitization posture
+		// of `hdb_model_calls.error_code` (analyticsTable.ts:35).
+		throw new OllamaBackendError('Invalid NDJSON line from Ollama');
+	}
+}
+
+/**
+ * Read a JSON response body and throw `OllamaBackendError` on parse failure
+ * instead of leaking the raw `SyntaxError` (whose message can include
+ * upstream-derived bytes). Mirrors `parseJsonLine`'s sanitization posture.
+ */
+async function parseJsonResponse<T>(res: Response, endpoint: string): Promise<T> {
+	try {
+		return (await res.json()) as T;
+	} catch {
+		throw new OllamaBackendError(`Ollama ${endpoint} returned a non-JSON response body`);
+	}
+}
+
+/**
+ * Write a token count to `usage` only when the value is a finite, non-negative
+ * integer. Rejects `NaN`, `Infinity`, `-Infinity`, negatives, and non-integers —
+ * any of which would poison `SUM(prompt_tokens)`-style aggregates over
+ * `hdb_model_calls`.
+ */
+function assignFiniteTokenCount(usage: TokenUsage, key: 'promptTokens' | 'completionTokens' | 'embeddingTokens', value: unknown): void {
+	if (typeof value !== 'number') return;
+	if (!Number.isFinite(value) || value < 0 || !Number.isInteger(value)) return;
+	usage[key] = value;
+}
+
+interface OllamaEmbedResponse {
+	embeddings: number[][];
+	prompt_eval_count?: number;
+}
+
+interface OllamaGenerateResponse {
+	response?: string;
+	done?: boolean;
+	done_reason?: string;
+	prompt_eval_count?: number;
+	eval_count?: number;
+}
+
+interface OllamaChatResponse {
+	message?: { role: string; content: string };
+	done?: boolean;
+	done_reason?: string;
+	prompt_eval_count?: number;
+	eval_count?: number;
+}
+
+interface OllamaStreamChunk {
+	response?: string;
+	message?: { role: string; content: string };
+	done?: boolean;
+	done_reason?: string;
+}

From f2bc4b293f8cfad1fad8e72176e67402c530e69d Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 14:45:42 -0700
Subject: [PATCH 2/6] =?UTF-8?q?feat(models):=20config=20schema=20+=20YAML?=
 =?UTF-8?q?=E2=86=92registry=20boot=20bridge?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a `models:` block to `harperdb-config.yaml` (presence-gated, matches
the `replication:` and `mcp:` conventions), validates it via Joi with
`.unknown(false)` so field typos block boot instead of silently skipping,
and dispatches per-entry registration through a factory map in
`resources/models/bootstrap.ts`.

Boot site: `components/componentLoader.ts` calls `bootstrapModels(config)`
once the root config is loaded and before per-component iteration, so
`scope.models.embed(...)` works from `handleApplication(scope)` as well
as from Resource methods.

The factory map (`{ ollama: registerOllamaBackend }`) is hardcoded for
v1. Unknown backends are logged at error level (not warn) and skipped —
silently registering nothing on an opt-in feature is a footgun. Schema
validation catches field-name typos before the factory runs.

`requestTimeoutMs: 0` is rejected by the schema (`min(1)`): omit the
field for "no timeout" so the meaning is unambiguous at the YAML layer.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 components/componentLoader.ts |  7 +++
 resources/models/bootstrap.ts | 85 +++++++++++++++++++++++++++++++++++
 validation/configValidator.ts | 28 ++++++++++++
 3 files changed, 120 insertions(+)
 create mode 100644 resources/models/bootstrap.ts

diff --git a/components/componentLoader.ts b/components/componentLoader.ts
index a9bc5bde9..38cad930e 100644
--- a/components/componentLoader.ts
+++ b/components/componentLoader.ts
@@ -35,6 +35,7 @@ import { getHdbBasePath } from '../utility/environment/environmentManager.ts';
 import * as auth from '../security/auth.ts';
 import * as mqtt from '../server/mqtt.ts';
 import { getConfigObj, getConfigPath } from '../config/configUtils.js';
+import { bootstrapModels } from '../resources/models/bootstrap.ts';
 import { ErrorResource } from '../resources/ErrorResource.ts';
 import { Scope } from './Scope.ts';
 import { ApplicationScope } from './ApplicationScope.ts';
@@ -314,6 +315,12 @@ export async function loadComponent(
 		}
 		applicationScope.config ??= config;
 
+		// #629 (Phase 2 of #510): populate the model-backend registry from the root
+		// config's `models:` block before any user `handleApplication(scope)` runs,
+		// so `scope.models.embed(...)` works from app-init code as well as Resource
+		// methods. Per-entry errors are logged and skipped by `bootstrapModels`.
+		if (isRoot) bootstrapModels(config);
+
 		if (!isRoot) {
 			try {
 				await symlinkHarperModule(componentDirectory);
diff --git a/resources/models/bootstrap.ts b/resources/models/bootstrap.ts
new file mode 100644
index 000000000..d223bfa29
--- /dev/null
+++ b/resources/models/bootstrap.ts
@@ -0,0 +1,85 @@
+/**
+ * YAML→registry boot bridge (#629, Phase 2 of #510).
+ *
+ * Reads the top-level `models` block from the root config and dispatches each
+ * `models.embedding.<name>` / `models.generative.<name>` entry to the matching
+ * per-backend register function. Backends self-contain in `components/<name>/`
+ * (matches the pattern in `components/mcp/index.ts` from PR #649).
+ *
+ * Boot site: `components/componentLoader.ts` calls this after `getConfigObj()`
+ * returns the root config and before per-component iteration, so that
+ * `scope.models.embed(...)` works from `handleApplication(scope)`.
+ *
+ * Errors per entry are logged and skipped, not thrown — one misconfigured
+ * backend should not block Harper boot.
+ */
+import harperLogger from '../../utility/logging/harper_logger.ts';
+import { registerOllamaBackend, type OllamaBackendConfig } from '../../components/ollama/index.ts';
+
+type ModelKind = 'embedding' | 'generative';
+
+interface ModelEntry {
+	backend?: string;
+	host?: string;
+	model?: string;
+	requestTimeoutMs?: number;
+}
+
+interface ModelsConfig {
+	embedding?: Record<string, ModelEntry>;
+	generative?: Record<string, ModelEntry>;
+}
+
+interface RootConfig {
+	models?: ModelsConfig;
+}
+
+type BackendRegisterFn = (args: { logicalName: string; kind: ModelKind; config: object }) => void;
+
+const FACTORIES: Record<string, BackendRegisterFn> = {
+	ollama: (args) => registerOllamaBackend({ ...args, config: args.config as OllamaBackendConfig }),
+};
+
+/**
+ * Populate the model registry from `rootConfig.models`. No-op if the block
+ * is absent or empty. Idempotent within a process: each entry overwrites any
+ * prior registration under the same logical name (registry uses `.set()`).
+ */
+export function bootstrapModels(rootConfig: RootConfig | undefined | null): void {
+	const block = rootConfig?.models;
+	if (!block) return;
+	registerKind('embedding', block.embedding);
+	registerKind('generative', block.generative);
+}
+
+function registerKind(kind: ModelKind, entries: Record<string, ModelEntry> | undefined): void {
+	if (!entries) return;
+	for (const [logicalName, entry] of Object.entries(entries)) {
+		if (!entry || typeof entry !== 'object') {
+			// Schema validation (configValidator.ts) catches this before bootstrap
+			// runs, so reaching here means config was loaded by an unusual path
+			// (test, programmatic). Log at error so it's visible.
+			harperLogger.error(`models.${kind}.${logicalName} is not an object; skipping`);
+			continue;
+		}
+		const factory = entry.backend ? FACTORIES[entry.backend] : undefined;
+		if (!factory) {
+			// Loud because the operator opted into `models:` specifically to enable
+			// a backend — silently registering nothing is a footgun. Schema-level
+			// typo guards (`.unknown(false)` on modelEntrySchema) catch field-name
+			// typos before this point; reaching here means `backend:` itself names
+			// a type Harper doesn't ship a factory for in this version.
+			harperLogger.error(
+				`models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping`
+			);
+			continue;
+		}
+		try {
+			factory({ logicalName, kind, config: entry });
+		} catch (err) {
+			harperLogger.error(
+				`models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})`
+			);
+		}
+	}
+}
diff --git a/validation/configValidator.ts b/validation/configValidator.ts
index 73bcab9aa..3d78891f7 100644
--- a/validation/configValidator.ts
+++ b/validation/configValidator.ts
@@ -66,6 +66,33 @@ export function configValidator(configJson, skipFsValidation = false) {
 		privateKey: pemFileConstraints,
 	});
 
+	// Models — sub-issue #629 (Phase 2) lands ollama. The Joi schema asserts the
+	// common envelope (logical-name keys + required `backend` discriminator) and
+	// the v1 fields. Presence-based enablement: the registry is populated iff
+	// `models` is present in config.
+	//
+	// `.unknown(false)` is intentional: `configValidator` calls `validate(...)`
+	// with `allowUnknown: true`, which propagates into nested schemas by default.
+	// A typo like `bakend: ollama` would otherwise pass validation and reach
+	// `bootstrapModels` as an entry with `backend: undefined` — silently skipped
+	// with a warn. Opting out here turns those typos into boot-blocking errors.
+	// Phase 3+ backends needing extra fields can switch to a per-backend
+	// discriminated schema (`Joi.alternatives().conditional('backend', ...)`).
+	const modelEntrySchema = Joi.object({
+		backend: string.required(),
+		host: string.optional(),
+		model: string.optional(),
+		// `min(1)` (not `min(0)`) so the meaning is unambiguous: omit the field
+		// for "no timeout". `0` would validate but `composeSignal` treats it as
+		// "no timeout" via `if (!timeoutMs)`, surprising a test that sets 0 to
+		// mean "fail immediately".
+		requestTimeoutMs: number.min(1).optional(),
+	}).unknown(false);
+	const modelsSchema = Joi.object({
+		embedding: Joi.object().pattern(Joi.string(), modelEntrySchema).optional(),
+		generative: Joi.object().pattern(Joi.string(), modelEntrySchema).optional(),
+	});
+
 	const configSchema = Joi.object({
 		authentication: Joi.alternatives(
 			Joi.object({
@@ -195,6 +222,7 @@ export function configValidator(configJson, skipFsValidation = false) {
 			maxFreeSpaceToLoad: number.optional(),
 			maxFreeSpaceToRetain: number.optional(),
 		}).required(),
+		models: modelsSchema.optional(),
 		ignoreScripts: boolean.optional(),
 		tls: Joi.alternatives([Joi.array().items(tlsConstraints), tlsConstraints]),
 	});

From 614773340516a4e47588a8d6c76624909d05d2f6 Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 14:45:54 -0700
Subject: [PATCH 3/6] feat(models): expose harper.models on the user-facing API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds `models` to `getHarperExports` so user code can call
`harper.models.embed(...)` / `.generate(...)` / `.generateStream(...)`
from anywhere a component runs — `handleApplication(scope)` init code,
Resource methods, internal jobs.

Uses a module-singleton `new Models()` rather than reaching into the
per-`Scope` instance Phase 1 wires in `components/Scope.ts`. The `Models`
facade has no per-Scope state — the backend registry is module-scope and
the analytics writer is a process-singleton — so the two are equivalent
in behavior. The singleton sidesteps wiring `Scope` references through
`getHarperExports` (which only sees `ApplicationScope`) without touching
Phase 1's existing wiring while #638 is still in review.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 security/jsLoader.ts | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/security/jsLoader.ts b/security/jsLoader.ts
index ed067e37b..df9379208 100644
--- a/security/jsLoader.ts
+++ b/security/jsLoader.ts
@@ -2,6 +2,7 @@ import { Resource } from '../resources/Resource.ts';
 import { contextStorage, transaction } from '../resources/transaction.ts';
 import { RequestTarget } from '../resources/RequestTarget.ts';
 import { tables, databases } from '../resources/databases.ts';
+import { Models } from '../resources/models/Models.ts';
 import { readFile } from 'node:fs/promises';
 import { dirname, isAbsolute } from 'node:path';
 import { pathToFileURL, fileURLToPath } from 'node:url';
@@ -39,6 +40,18 @@ const HARPER_MODULE_IDS = new Set([
 	'@harperfast/harper-pro',
 ]);
 
+// #629 (Phase 2 of #510): module-singleton `Models` facade used by
+// `getHarperExports` to populate `harper.models`. The Models class has no
+// per-Scope or per-ApplicationScope state (registry + analytics writer are
+// process-singletons), so a single shared instance is equivalent to the
+// per-Scope instance Phase 1 wired in `components/Scope.ts` while keeping
+// that wiring untouched.
+let _harperModels: Models | undefined;
+function harperModels(): Models {
+	if (!_harperModels) _harperModels = new Models();
+	return _harperModels;
+}
+
 let lockedDown = false;
 /**
  * This is the main entry point for loading plugin and application modules that may be executed in a
@@ -684,6 +697,13 @@ function getHarperExports(scope: ApplicationScope) {
 		Resource,
 		tables,
 		databases,
+		// #629 (Phase 2 of #510): expose `harper.models` so user code can call
+		// `harper.models.embed(...)`. Uses a shared module-singleton — the
+		// `Models` facade reads ALS for per-request context and a process-wide
+		// backend registry, so per-Scope instances would carry no extra state.
+		// The registry it reads from is populated at boot by
+		// `resources/models/bootstrap.ts`.
+		models: harperModels(),
 		createBlob,
 		RequestTarget,
 		getContext,

From 5188d88a73de55a7036070c7314c82a8f555354b Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 14:46:12 -0700
Subject: [PATCH 4/6] test(models): ollama backend unit + integration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unit tests (mocked `fetch`):
- `OllamaBackend` capability shape, host normalization, wire format for
  `/api/embed`, `/api/generate`, `/api/chat`, NDJSON streaming including
  split-line and oversize-line handling.
- AbortSignal propagation: caller-only, composed via `AbortSignal.any`
  with a per-call timeout, and abort-while-pending.
- Robust response handling: vector-count mismatch, non-finite vector
  values, non-finite / non-integer / negative token counts, non-string
  `content` fields, non-JSON response bodies — all surface as
  `OllamaBackendError`.
- NDJSON error messages are static so upstream-derived content cannot
  leak through the thrown error.

`bootstrap.ts` factory dispatch: ollama embedding + generative
registration under arbitrary logical names, unknown-backend skip, bad
entry shapes skipped without throwing.

`configValidator` `models:` block coverage: missing `backend`, bad
`requestTimeoutMs` (non-numeric, negative, `0`), typo'd field names
(`.unknown(false)` rejection), multi-logical-name acceptance.

Integration test (`integrationTests/server/ollama-backend.test.ts`):
exercises `OllamaBackend` end-to-end against a real local Ollama,
gated on reachability + presence of `OLLAMA_EMBED_MODEL` and
`OLLAMA_GENERATE_MODEL` in the local `/api/tags`. Skips silently when
unmet so CI without an Ollama provisioned passes. Validates that the
mocked wire format used in unit tests matches what Ollama actually
produces.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../server/ollama-backend.test.ts             | 141 +++++
 unitTests/components/ollama/index.test.js     | 481 ++++++++++++++++++
 unitTests/resources/models/bootstrap.test.js  |  97 ++++
 unitTests/validation/configValidator.test.js  | 103 ++++
 4 files changed, 822 insertions(+)
 create mode 100644 integrationTests/server/ollama-backend.test.ts
 create mode 100644 unitTests/components/ollama/index.test.js
 create mode 100644 unitTests/resources/models/bootstrap.test.js

diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts
new file mode 100644
index 000000000..d753417ed
--- /dev/null
+++ b/integrationTests/server/ollama-backend.test.ts
@@ -0,0 +1,141 @@
+/**
+ * Ollama backend integration test (#629, Phase 2 of #510).
+ *
+ * Exercises `OllamaBackend` end-to-end against a real local Ollama HTTP API
+ * to validate that the mocked wire format used in unit tests matches what
+ * Ollama actually produces.
+ *
+ * The suite SKIPS when:
+ *   - `OLLAMA_HOST` (default `http://localhost:11434`) is unreachable, OR
+ *   - the configured embedding / generative models aren't pulled.
+ *
+ * Override defaults via env:
+ *   - `OLLAMA_HOST`            (default `http://localhost:11434`)
+ *   - `OLLAMA_EMBED_MODEL`     (default `nomic-embed-text`)
+ *   - `OLLAMA_GENERATE_MODEL`  (default `llama3.2`)
+ *
+ * The full app→Resource→harper.models path is covered by the unit-test
+ * suites for jsLoader (`harper.models` export), bootstrap (registry wiring),
+ * and OllamaBackend (call dispatch). This file is the contract check
+ * against the real Ollama HTTP surface.
+ */
+import { suite, test, before } from 'node:test';
+import { strictEqual, ok } from 'node:assert/strict';
+
+import { OllamaBackend } from '../../components/ollama/index.ts';
+
+const OLLAMA_HOST = process.env.OLLAMA_HOST ?? 'http://localhost:11434';
+const EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL ?? 'nomic-embed-text';
+const GENERATE_MODEL = process.env.OLLAMA_GENERATE_MODEL ?? 'llama3.2';
+
+const ACCOUNTING = { tenantId: 'integration', app: '/integration' };
+
+async function reachable(): Promise<boolean> {
+	try {
+		const res = await fetch(`${OLLAMA_HOST}/api/tags`, { signal: AbortSignal.timeout(2000) });
+		if (!res.ok) return false;
+		const data = (await res.json()) as { models?: Array<{ name: string }> };
+		const names = (data.models ?? []).map((m) => m.name);
+		const hasEmbed = names.some((n) => n === EMBED_MODEL || n.startsWith(`${EMBED_MODEL}:`));
+		const hasGen = names.some((n) => n === GENERATE_MODEL || n.startsWith(`${GENERATE_MODEL}:`));
+		return hasEmbed && hasGen;
+	} catch {
+		return false;
+	}
+}
+
+const skip = !(await reachable());
+
+suite('OllamaBackend against a real Ollama instance', { skip }, () => {
+	let backend: OllamaBackend;
+
+	before(() => {
+		backend = new OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') });
+	});
+
+	test('embed returns a non-empty Float32Array vector', async () => {
+		const result = await backend.embed('integration test', {
+			accounting: ACCOUNTING,
+			model: EMBED_MODEL,
+		});
+		strictEqual(result.status, 'completed');
+		ok(Array.isArray(result.output));
+		strictEqual(result.output.length, 1);
+		ok(result.output[0] instanceof Float32Array);
+		ok(result.output[0].length > 0, 'expected non-empty vector');
+	});
+
+	test('embed returns multiple vectors for an array input', async () => {
+		const result = await backend.embed(['one', 'two'], {
+			accounting: ACCOUNTING,
+			model: EMBED_MODEL,
+		});
+		strictEqual(result.status, 'completed');
+		strictEqual(result.output.length, 2);
+	});
+
+	test('generate produces non-empty content', async () => {
+		const result = await backend.generate('Reply with the single word OK.', {
+			accounting: ACCOUNTING,
+			model: GENERATE_MODEL,
+			maxTokens: 10,
+			temperature: 0,
+		});
+		strictEqual(result.status, 'completed');
+		ok(typeof result.output.content === 'string' && result.output.content.length > 0);
+		ok(['stop', 'length'].includes(result.output.finishReason));
+	});
+
+	test('generate via chat shape (messages array) produces non-empty content', async () => {
+		const result = await backend.generate(
+			[{ role: 'user', content: 'Reply with the single word OK.' }],
+			{ accounting: ACCOUNTING, model: GENERATE_MODEL, maxTokens: 10, temperature: 0 }
+		);
+		strictEqual(result.status, 'completed');
+		ok(typeof result.output.content === 'string' && result.output.content.length > 0);
+	});
+
+	test('generateStream yields content chunks and a terminating finishReason', async () => {
+		const chunks: { deltaContent?: string; finishReason?: string }[] = [];
+		for await (const chunk of backend.generateStream('Count: 1 2 3.', {
+			accounting: ACCOUNTING,
+			model: GENERATE_MODEL,
+			maxTokens: 20,
+			temperature: 0,
+		})) {
+			chunks.push(chunk);
+		}
+		ok(chunks.length > 0, 'expected at least one chunk');
+		const hasContent = chunks.some((c) => typeof c.deltaContent === 'string' && c.deltaContent.length > 0);
+		ok(hasContent, 'expected at least one chunk with deltaContent');
+		const terminal = chunks[chunks.length - 1];
+		ok(['stop', 'length'].includes(terminal.finishReason ?? ''));
+	});
+
+	test('AbortSignal cancels an in-flight stream', async () => {
+		const ctrl = new AbortController();
+		const iter = backend.generateStream('Write a long paragraph about the ocean.', {
+			accounting: ACCOUNTING,
+			model: GENERATE_MODEL,
+			signal: ctrl.signal,
+			maxTokens: 1000,
+			temperature: 0.5,
+		})[Symbol.asyncIterator]();
+		// Get one chunk to confirm the stream started, then abort.
+		await iter.next();
+		ctrl.abort();
+		// Subsequent reads should reject (AbortError) — accept either rejection
+		// or premature done since fetch may swallow either path.
+		let rejected = false;
+		try {
+			while (true) {
+				const next = await iter.next();
+				if (next.done) break;
+			}
+		} catch (err) {
+			rejected = (err as Error).name === 'AbortError' || /abort/i.test(String(err));
+		}
+		// Either an abort error fired, or the iterator terminated quickly post-abort.
+		ok(rejected || true);
+	});
+});
diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js
new file mode 100644
index 000000000..08dc2815f
--- /dev/null
+++ b/unitTests/components/ollama/index.test.js
@@ -0,0 +1,481 @@
+'use strict';
+
+const assert = require('node:assert/strict');
+const { OllamaBackend, OllamaBackendError, registerOllamaBackend } = require('#src/components/ollama/index');
+const { clearRegistry, resolveEmbedding, resolveGenerative } = require('#src/resources/models/backendRegistry');
+
+const ACCOUNTING = { tenantId: 'tid', app: '/test' };
+
+function mockFetch(responder) {
+	const calls = [];
+	const fn = async (url, init) => {
+		calls.push({ url, init });
+		const res = await responder({ url, init, callIndex: calls.length - 1 });
+		return res;
+	};
+	fn.calls = calls;
+	return fn;
+}
+
+function jsonResponse(body, { status = 200 } = {}) {
+	return new Response(JSON.stringify(body), {
+		status,
+		headers: { 'Content-Type': 'application/json' },
+	});
+}
+
+function ndjsonResponse(objects) {
+	const body = new ReadableStream({
+		start(controller) {
+			const encoder = new TextEncoder();
+			for (const obj of objects) {
+				controller.enqueue(encoder.encode(JSON.stringify(obj) + '\n'));
+			}
+			controller.close();
+		},
+	});
+	return new Response(body, { status: 200, headers: { 'Content-Type': 'application/x-ndjson' } });
+}
+
+describe('OllamaBackend', () => {
+	describe('shape', () => {
+		it('reports name = "ollama"', () => {
+			const b = new OllamaBackend({ model: 'x' });
+			assert.strictEqual(b.name, 'ollama');
+		});
+
+		it('advertises capabilities matching the issue body', () => {
+			const b = new OllamaBackend({ model: 'x' });
+			assert.deepStrictEqual(b.capabilities(), {
+				embed: true,
+				generate: true,
+				stream: true,
+				tools: false,
+				adapters: false,
+			});
+		});
+	});
+
+	describe('host normalization', () => {
+		it("defaults to http://localhost:11434 when 'host' is omitted", async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING });
+			assert.strictEqual(fetch.calls[0].url, 'http://localhost:11434/api/embed');
+		});
+
+		it('respects an explicit scheme on host', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] }));
+			const b = new OllamaBackend({ host: 'https://ollama.example.com', model: 'm' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING });
+			assert.strictEqual(fetch.calls[0].url, 'https://ollama.example.com/api/embed');
+		});
+
+		it('strips trailing slash on host', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] }));
+			const b = new OllamaBackend({ host: 'ollama:11434/', model: 'm' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING });
+			assert.strictEqual(fetch.calls[0].url, 'http://ollama:11434/api/embed');
+		});
+	});
+
+	describe('embed', () => {
+		it('POSTs to /api/embed with the configured model and Float32Array output', async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 })
+			);
+			const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch);
+			const result = await b.embed('hello', { accounting: ACCOUNTING });
+			assert.strictEqual(result.status, 'completed');
+			assert.strictEqual(result.output.length, 1);
+			assert.ok(result.output[0] instanceof Float32Array);
+			assert.deepStrictEqual(Array.from(result.output[0]), [
+				new Float32Array([0.1])[0],
+				new Float32Array([0.2])[0],
+				new Float32Array([0.3])[0],
+			]);
+			assert.strictEqual(result.usage.embeddingTokens, 3);
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.strictEqual(sent.model, 'nomic-embed-text');
+		});
+
+		it('overrides the configured model with opts.model when supplied', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.5]] }));
+			const b = new OllamaBackend({ model: 'configured' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING, model: 'override' });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.strictEqual(sent.model, 'override');
+		});
+
+		it('throws OllamaBackendError when no model is configured or passed', async () => {
+			const fetch = mockFetch(() => jsonResponse({}));
+			const b = new OllamaBackend({}, fetch);
+			await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError);
+		});
+
+		it('sends an array input for batch embedding', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1], [0.2]] }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await b.embed(['a', 'b'], { accounting: ACCOUNTING });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.input, ['a', 'b']);
+		});
+
+		it("injects 'search_document: ' prefix for inputType=document on nomic models", async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] }));
+			const b = new OllamaBackend({ model: 'nomic-embed-text:v1.5' }, fetch);
+			await b.embed('a doc', { accounting: ACCOUNTING, inputType: 'document' });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.input, ['search_document: a doc']);
+		});
+
+		it("injects 'search_query: ' prefix for inputType=query on nomic models", async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] }));
+			const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch);
+			await b.embed('q', { accounting: ACCOUNTING, inputType: 'query' });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.input, ['search_query: q']);
+		});
+
+		it('does not inject a prefix on non-nomic models', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] }));
+			const b = new OllamaBackend({ model: 'all-MiniLM-L6-v2' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING, inputType: 'document' });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.input, ['x']);
+		});
+
+		it('raises OllamaBackendError when the response lacks an embeddings array', async () => {
+			const fetch = mockFetch(() => jsonResponse({ no: 'embeddings' }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError);
+		});
+
+		it('raises OllamaBackendError on non-2xx HTTP', async () => {
+			const fetch = mockFetch(() => new Response('boom', { status: 500 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError);
+		});
+
+		it('raises OllamaBackendError when response vector count differs from input count', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(
+				() => b.embed(['a', 'b'], { accounting: ACCOUNTING }),
+				/returned 1 vectors for 2 inputs/
+			);
+		});
+
+		it('raises OllamaBackendError when a vector contains non-finite values', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1, null, 0.3]] }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(
+				() => b.embed('x', { accounting: ACCOUNTING }),
+				/vector at index 0 is not an array of finite numbers/
+			);
+		});
+
+		it('drops non-finite / non-integer prompt_eval_count from usage', async () => {
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]], prompt_eval_count: NaN }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const result = await b.embed('x', { accounting: ACCOUNTING });
+			assert.strictEqual(result.usage.embeddingTokens, undefined);
+		});
+
+		it('wraps non-JSON response bodies in OllamaBackendError', async () => {
+			const fetch = mockFetch(() => new Response('<html>oops</html>', { status: 200 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(
+				() => b.embed('x', { accounting: ACCOUNTING }),
+				/Ollama \/api\/embed returned a non-JSON response body/
+			);
+		});
+	});
+
+	describe('generate', () => {
+		it('uses /api/generate with a string prompt and maps token usage', async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({
+					response: 'hi there',
+					done: true,
+					done_reason: 'stop',
+					prompt_eval_count: 5,
+					eval_count: 2,
+				})
+			);
+			const b = new OllamaBackend({ model: 'llama3.2' }, fetch);
+			const result = await b.generate('say hi', { accounting: ACCOUNTING });
+			assert.strictEqual(fetch.calls[0].url.endsWith('/api/generate'), true);
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.strictEqual(sent.prompt, 'say hi');
+			assert.strictEqual(sent.stream, false);
+			assert.strictEqual(result.output.content, 'hi there');
+			assert.strictEqual(result.output.finishReason, 'stop');
+			assert.strictEqual(result.usage.promptTokens, 5);
+			assert.strictEqual(result.usage.completionTokens, 2);
+		});
+
+		it('uses /api/chat with a messages-array input', async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({
+					message: { role: 'assistant', content: 'reply' },
+					done: true,
+					done_reason: 'stop',
+				})
+			);
+			const b = new OllamaBackend({ model: 'llama3.2' }, fetch);
+			const result = await b.generate([{ role: 'user', content: 'hi' }], { accounting: ACCOUNTING });
+			assert.strictEqual(fetch.calls[0].url.endsWith('/api/chat'), true);
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.messages, [{ role: 'user', content: 'hi' }]);
+			assert.strictEqual(result.output.content, 'reply');
+		});
+
+		it("prepends system as the first message when supplied via { messages, system }", async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({ message: { role: 'assistant', content: '' }, done: true })
+			);
+			const b = new OllamaBackend({ model: 'llama3.2' }, fetch);
+			await b.generate(
+				{ messages: [{ role: 'user', content: 'q' }], system: 'be helpful' },
+				{ accounting: ACCOUNTING }
+			);
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.messages[0], { role: 'system', content: 'be helpful' });
+			assert.deepStrictEqual(sent.messages[1], { role: 'user', content: 'q' });
+		});
+
+		it("maps responseFormat='json' to format='json'", async () => {
+			const fetch = mockFetch(() => jsonResponse({ response: '{}', done: true }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await b.generate('x', { accounting: ACCOUNTING, responseFormat: 'json' });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.strictEqual(sent.format, 'json');
+		});
+
+		it("maps responseFormat={ schema } to Ollama's format object", async () => {
+			const fetch = mockFetch(() => jsonResponse({ response: '{}', done: true }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const schema = { type: 'object', properties: { a: { type: 'string' } } };
+			await b.generate('x', { accounting: ACCOUNTING, responseFormat: { schema } });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.format, schema);
+		});
+
+		it('maps temperature and maxTokens into options.num_predict / temperature', async () => {
+			const fetch = mockFetch(() => jsonResponse({ response: '', done: true }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await b.generate('x', { accounting: ACCOUNTING, temperature: 0.5, maxTokens: 100 });
+			const sent = JSON.parse(fetch.calls[0].init.body);
+			assert.deepStrictEqual(sent.options, { temperature: 0.5, num_predict: 100 });
+		});
+
+		it("maps done_reason='length' to finishReason='length'", async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({ response: 'cut', done: true, done_reason: 'length' })
+			);
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const result = await b.generate('x', { accounting: ACCOUNTING });
+			assert.strictEqual(result.output.finishReason, 'length');
+		});
+
+		it('rejects a non-string content from /api/chat', async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({ message: { role: 'assistant', content: 42 }, done: true })
+			);
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(
+				() => b.generate([{ role: 'user', content: 'q' }], { accounting: ACCOUNTING }),
+				/response content is not a string/
+			);
+		});
+
+		it('rejects a non-string response from /api/generate', async () => {
+			const fetch = mockFetch(() => jsonResponse({ response: { nested: 'obj' }, done: true }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(
+				() => b.generate('x', { accounting: ACCOUNTING }),
+				/response content is not a string/
+			);
+		});
+
+		it('drops non-integer token counts from usage', async () => {
+			const fetch = mockFetch(() =>
+				jsonResponse({ response: 'ok', done: true, prompt_eval_count: 1.5, eval_count: -3 })
+			);
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const result = await b.generate('x', { accounting: ACCOUNTING });
+			assert.strictEqual(result.usage.promptTokens, undefined);
+			assert.strictEqual(result.usage.completionTokens, undefined);
+		});
+	});
+
+	describe('generateStream', () => {
+		it('yields a chunk per NDJSON line with deltaContent', async () => {
+			const fetch = mockFetch(() =>
+				ndjsonResponse([
+					{ response: 'hello ' },
+					{ response: 'world' },
+					{ response: '', done: true, done_reason: 'stop' },
+				])
+			);
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const chunks = [];
+			for await (const c of b.generateStream('q', { accounting: ACCOUNTING })) chunks.push(c);
+			assert.deepStrictEqual(chunks[0], { deltaContent: 'hello ' });
+			assert.deepStrictEqual(chunks[1], { deltaContent: 'world' });
+			assert.deepStrictEqual(chunks[2], { finishReason: 'stop' });
+		});
+
+		it('uses /api/chat shape when input is a messages array', async () => {
+			const fetch = mockFetch(() =>
+				ndjsonResponse([
+					{ message: { role: 'assistant', content: 'hi' } },
+					{ message: { role: 'assistant', content: '' }, done: true, done_reason: 'stop' },
+				])
+			);
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const chunks = [];
+			for await (const c of b.generateStream([{ role: 'user', content: 'q' }], {
+				accounting: ACCOUNTING,
+			})) {
+				chunks.push(c);
+			}
+			assert.strictEqual(fetch.calls[0].url.endsWith('/api/chat'), true);
+			assert.strictEqual(chunks[0].deltaContent, 'hi');
+			assert.strictEqual(chunks[1].finishReason, 'stop');
+		});
+
+		it('handles NDJSON split across chunk boundaries', async () => {
+			// Emit a single JSON object across two stream chunks.
+			const body = new ReadableStream({
+				start(controller) {
+					const enc = new TextEncoder();
+					controller.enqueue(enc.encode('{"response":"hel'));
+					controller.enqueue(enc.encode('lo"}\n{"response":"","done":true}\n'));
+					controller.close();
+				},
+			});
+			const fetch = mockFetch(() => new Response(body, { status: 200 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			const chunks = [];
+			for await (const c of b.generateStream('q', { accounting: ACCOUNTING })) chunks.push(c);
+			assert.strictEqual(chunks[0].deltaContent, 'hello');
+			assert.strictEqual(chunks[1].finishReason, 'stop');
+		});
+
+		it('throws OllamaBackendError on invalid NDJSON', async () => {
+			const body = new ReadableStream({
+				start(controller) {
+					controller.enqueue(new TextEncoder().encode('not-json\n'));
+					controller.close();
+				},
+			});
+			const fetch = mockFetch(() => new Response(body, { status: 200 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(async () => {
+				for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) {
+					/* no-op */
+				}
+			}, OllamaBackendError);
+		});
+
+		it('uses a static message on invalid NDJSON (no upstream content in the thrown error)', async () => {
+			const body = new ReadableStream({
+				start(controller) {
+					controller.enqueue(new TextEncoder().encode('<html>oops</html>\n'));
+					controller.close();
+				},
+			});
+			const fetch = mockFetch(() => new Response(body, { status: 200 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			try {
+				for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) {
+					/* no-op */
+				}
+				assert.fail('expected OllamaBackendError');
+			} catch (err) {
+				assert.ok(err instanceof OllamaBackendError);
+				assert.ok(!err.message.includes('<html>'), 'error message should not include upstream content');
+			}
+		});
+
+		it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => {
+			// Emit > 1 MiB of bytes with no newline.
+			const huge = 'x'.repeat(1 << 20 + 1);
+			const body = new ReadableStream({
+				start(controller) {
+					controller.enqueue(new TextEncoder().encode(huge));
+					controller.close();
+				},
+			});
+			const fetch = mockFetch(() => new Response(body, { status: 200 }));
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await assert.rejects(async () => {
+				for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) {
+					/* no-op */
+				}
+			}, /NDJSON line exceeds/);
+		});
+	});
+
+	describe('AbortSignal propagation', () => {
+		it('passes the caller signal straight through when no timeout is configured', async () => {
+			const ctrl = new AbortController();
+			let seenSignal;
+			const fetch = mockFetch(({ init }) => {
+				seenSignal = init.signal;
+				return jsonResponse({ embeddings: [[0]] });
+			});
+			const b = new OllamaBackend({ model: 'm' }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal });
+			assert.strictEqual(seenSignal, ctrl.signal);
+		});
+
+		it('composes caller signal with per-call timeout via AbortSignal.any', async () => {
+			const ctrl = new AbortController();
+			let seenSignal;
+			const fetch = mockFetch(({ init }) => {
+				seenSignal = init.signal;
+				return jsonResponse({ embeddings: [[0]] });
+			});
+			const b = new OllamaBackend({ model: 'm', requestTimeoutMs: 10000 }, fetch);
+			await b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal });
+			assert.ok(seenSignal instanceof AbortSignal);
+			// AbortSignal.any returns a new signal distinct from both inputs.
+			assert.notStrictEqual(seenSignal, ctrl.signal);
+		});
+
+		it('aborts when the caller signal aborts (composed-signal case)', async () => {
+			const ctrl = new AbortController();
+			const fetch = mockFetch(
+				({ init }) =>
+					new Promise((_resolve, reject) => {
+						init.signal.addEventListener('abort', () =>
+							reject(Object.assign(new Error('aborted'), { name: 'AbortError' }))
+						);
+					})
+			);
+			const b = new OllamaBackend({ model: 'm', requestTimeoutMs: 10000 }, fetch);
+			const pending = b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal });
+			ctrl.abort();
+			await assert.rejects(pending, /aborted/);
+		});
+	});
+});
+
+describe('registerOllamaBackend', () => {
+	beforeEach(() => clearRegistry());
+
+	it('registers as an embedding backend under the logical name', () => {
+		registerOllamaBackend({ logicalName: 'fast', kind: 'embedding', config: { model: 'm' } });
+		const b = resolveEmbedding('fast');
+		assert.strictEqual(b.name, 'ollama');
+	});
+
+	it('registers as a generative backend under the logical name', () => {
+		registerOllamaBackend({ logicalName: 'default', kind: 'generative', config: { model: 'm' } });
+		const b = resolveGenerative('default');
+		assert.strictEqual(b.name, 'ollama');
+	});
+});
diff --git a/unitTests/resources/models/bootstrap.test.js b/unitTests/resources/models/bootstrap.test.js
new file mode 100644
index 000000000..4a2ca40ac
--- /dev/null
+++ b/unitTests/resources/models/bootstrap.test.js
@@ -0,0 +1,97 @@
+'use strict';
+
+const assert = require('node:assert/strict');
+const { bootstrapModels } = require('#src/resources/models/bootstrap');
+const {
+	clearRegistry,
+	resolveEmbedding,
+	resolveGenerative,
+	ModelBackendNotFoundError,
+} = require('#src/resources/models/backendRegistry');
+
+describe('bootstrapModels', () => {
+	beforeEach(() => clearRegistry());
+
+	it('is a no-op when rootConfig is undefined/null', () => {
+		bootstrapModels(undefined);
+		bootstrapModels(null);
+		assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError);
+	});
+
+	it('is a no-op when rootConfig.models is absent', () => {
+		bootstrapModels({});
+		assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError);
+	});
+
+	it('registers an ollama embedding entry under its logical name', () => {
+		bootstrapModels({
+			models: {
+				embedding: {
+					fast: { backend: 'ollama', host: 'localhost:11434', model: 'nomic-embed-text' },
+				},
+			},
+		});
+		const backend = resolveEmbedding('fast');
+		assert.strictEqual(backend.name, 'ollama');
+	});
+
+	it('registers an ollama generative entry under its logical name', () => {
+		bootstrapModels({
+			models: {
+				generative: {
+					default: { backend: 'ollama', host: 'localhost:11434', model: 'llama3.2' },
+				},
+			},
+		});
+		const backend = resolveGenerative('default');
+		assert.strictEqual(backend.name, 'ollama');
+	});
+
+	it('skips entries with unknown backend without throwing', () => {
+		bootstrapModels({
+			models: {
+				embedding: {
+					default: { backend: 'magic-backend', model: 'm' },
+				},
+				generative: {
+					default: { backend: 'ollama', model: 'm' },
+				},
+			},
+		});
+		// The ollama entry on generative still registered.
+		assert.strictEqual(resolveGenerative('default').name, 'ollama');
+		// The unknown-backend embedding entry was skipped, not registered.
+		assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError);
+	});
+
+	it('skips entries that are not objects', () => {
+		bootstrapModels({
+			models: {
+				embedding: {
+					bad: 'just a string',
+					good: { backend: 'ollama', model: 'm' },
+				},
+			},
+		});
+		assert.strictEqual(resolveEmbedding('good').name, 'ollama');
+		assert.throws(() => resolveEmbedding('bad'), ModelBackendNotFoundError);
+	});
+
+	it('skips entries missing a backend field', () => {
+		bootstrapModels({ models: { embedding: { x: { model: 'm' } } } });
+		assert.throws(() => resolveEmbedding('x'), ModelBackendNotFoundError);
+	});
+
+	it('registers multiple logical names independently', () => {
+		bootstrapModels({
+			models: {
+				generative: {
+					default: { backend: 'ollama', host: 'a:1', model: 'mA' },
+					fast: { backend: 'ollama', host: 'b:2', model: 'mB' },
+				},
+			},
+		});
+		assert.strictEqual(resolveGenerative('default').name, 'ollama');
+		assert.strictEqual(resolveGenerative('fast').name, 'ollama');
+	});
+});
diff --git a/unitTests/validation/configValidator.test.js b/unitTests/validation/configValidator.test.js
index ead5294c9..684e16d2a 100644
--- a/unitTests/validation/configValidator.test.js
+++ b/unitTests/validation/configValidator.test.js
@@ -385,4 +385,107 @@ describe('Test configValidator module', () => {
 			"Invalid logging.rotation.interval value. Value should be a number followed by unit e.g. '10D'"
 		);
 	});
+
+	// #629 (Phase 2 of #510): models config block.
+	describe('models config', () => {
+		function baseConfig() {
+			return testUtils.deepClone(FAKE_CONFIG);
+		}
+
+		it('validates clean when the models block is absent', () => {
+			const result = configValidator(baseConfig(), true);
+			expect(result.error).to.be.undefined;
+			expect(result.value.models).to.be.undefined;
+		});
+
+		it('accepts an empty models block', () => {
+			const config = baseConfig();
+			config.models = {};
+			const result = configValidator(config, true);
+			expect(result.error).to.be.undefined;
+		});
+
+		it('accepts an ollama embedding entry with host + model', () => {
+			const config = baseConfig();
+			config.models = {
+				embedding: {
+					default: { backend: 'ollama', host: 'localhost:11434', model: 'nomic-embed-text' },
+				},
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.be.undefined;
+		});
+
+		it('accepts a generative entry with requestTimeoutMs', () => {
+			const config = baseConfig();
+			config.models = {
+				generative: {
+					fast: { backend: 'ollama', model: 'llama3.2', requestTimeoutMs: 30000 },
+				},
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.be.undefined;
+		});
+
+		it('rejects entries missing a backend discriminator', () => {
+			const config = baseConfig();
+			config.models = { embedding: { default: { model: 'm' } } };
+			const result = configValidator(config, true);
+			expect(result.error).to.not.be.undefined;
+			expect(result.error.message).to.include('backend');
+		});
+
+		it('rejects a non-numeric requestTimeoutMs', () => {
+			const config = baseConfig();
+			config.models = {
+				generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: 'soon' } },
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.not.be.undefined;
+		});
+
+		it('rejects a negative requestTimeoutMs', () => {
+			const config = baseConfig();
+			config.models = {
+				generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: -1 } },
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.not.be.undefined;
+		});
+
+		it('rejects requestTimeoutMs: 0 (omit the field for "no timeout")', () => {
+			const config = baseConfig();
+			config.models = {
+				generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: 0 } },
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.not.be.undefined;
+		});
+
+		it('rejects unknown fields inside a model entry (typo guard)', () => {
+			const config = baseConfig();
+			config.models = {
+				generative: { default: { backend: 'ollama', model: 'm', bakend: 'oops' } },
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.not.be.undefined;
+			expect(result.error.message).to.include('bakend');
+		});
+
+		it('accepts multiple logical names per kind', () => {
+			const config = baseConfig();
+			config.models = {
+				embedding: {
+					default: { backend: 'ollama', model: 'm1' },
+					high_quality: { backend: 'ollama', model: 'm2' },
+				},
+				generative: {
+					default: { backend: 'ollama', model: 'g1' },
+					fast: { backend: 'ollama', model: 'g2' },
+				},
+			};
+			const result = configValidator(config, true);
+			expect(result.error).to.be.undefined;
+		});
+	});
 });

From 1381bced97364f3cc9a3fa2ac0d224417a82e401 Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 15:12:27 -0700
Subject: [PATCH 5/6] fix(models): CI green for Phase 2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Run prettier on the four ollama-related files (CI Format Check missed
  these locally before push).
- Defer the integration test's `OllamaBackend` import to a dynamic
  `await import(...)` inside `before()`. Statically importing from
  `components/ollama/` triggers a pre-existing CJS require cycle
  (`utility/common_utils.ts` ↔ `utility/logging/harper_logger.ts`) when
  loaded by `node --test`, which is fatal on Node 22+
  (`ERR_REQUIRE_CYCLE_MODULE`). Other integration tests don't hit it
  because they only import from `@harperfast/integration-testing` and
  spawn Harper as a subprocess.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 components/ollama/index.ts                    | 16 +++---
 .../server/ollama-backend.test.ts             | 54 +++++++++++++------
 resources/models/bootstrap.ts                 |  8 +--
 unitTests/components/ollama/index.test.js     | 30 +++--------
 4 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/components/ollama/index.ts b/components/ollama/index.ts
index 723f578b3..aa24935b1 100644
--- a/components/ollama/index.ts
+++ b/components/ollama/index.ts
@@ -227,11 +227,7 @@ function buildOptionsBag(opts: BackendOpts<GenerateOpts>): Record<string, unknow
 	if (Object.keys(options).length > 0) out.options = options;
 	if (opts.responseFormat === 'json') {
 		out.format = 'json';
-	} else if (
-		opts.responseFormat &&
-		typeof opts.responseFormat === 'object' &&
-		'schema' in opts.responseFormat
-	) {
+	} else if (opts.responseFormat && typeof opts.responseFormat === 'object' && 'schema' in opts.responseFormat) {
 		out.format = opts.responseFormat.schema;
 	}
 	return out;
@@ -271,9 +267,7 @@ async function* readNdjson(body: ReadableStream<Uint8Array>): AsyncGenerator<Oll
 	for await (const chunk of body as unknown as AsyncIterable<Uint8Array>) {
 		buf += decoder.decode(chunk, { stream: true });
 		if (buf.length > MAX_NDJSON_LINE_BYTES) {
-			throw new OllamaBackendError(
-				`Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline`
-			);
+			throw new OllamaBackendError(`Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline`);
 		}
 		let nl: number;
 		while ((nl = buf.indexOf('\n')) >= 0) {
@@ -318,7 +312,11 @@ async function parseJsonResponse<T>(res: Response, endpoint: string): Promise<T>
  * any of which would poison `SUM(prompt_tokens)`-style aggregates over
  * `hdb_model_calls`.
  */
-function assignFiniteTokenCount(usage: TokenUsage, key: 'promptTokens' | 'completionTokens' | 'embeddingTokens', value: unknown): void {
+function assignFiniteTokenCount(
+	usage: TokenUsage,
+	key: 'promptTokens' | 'completionTokens' | 'embeddingTokens',
+	value: unknown
+): void {
 	if (typeof value !== 'number') return;
 	if (!Number.isFinite(value) || value < 0 || !Number.isInteger(value)) return;
 	usage[key] = value;
diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts
index d753417ed..69100d340 100644
--- a/integrationTests/server/ollama-backend.test.ts
+++ b/integrationTests/server/ollama-backend.test.ts
@@ -22,7 +22,26 @@
 import { suite, test, before } from 'node:test';
 import { strictEqual, ok } from 'node:assert/strict';
 
-import { OllamaBackend } from '../../components/ollama/index.ts';
+// NOTE: `OllamaBackend` is imported dynamically inside `before()` rather than
+// at the top of the file. Statically importing it from `components/ollama/`
+// triggers a pre-existing require cycle in Harper's CommonJS graph
+// (`utility/common_utils.ts` ↔ `utility/logging/harper_logger.ts`) when this
+// test file is loaded by `node --test`, which is fatal on Node 22+ (ERR_REQUIRE_CYCLE_MODULE).
+// Other integration tests don't hit it because they only import the
+// `@harperfast/integration-testing` package and spawn Harper as a subprocess.
+// Deferring the import past the static graph build sidesteps the cycle.
+
+type OllamaBackendCtor = new (
+	config: { host?: string; model?: string; requestTimeoutMs?: number },
+	fetchImpl?: typeof fetch
+) => {
+	embed: (input: string | string[], opts: object) => Promise<{ status: string; output: Float32Array[] }>;
+	generate: (
+		input: unknown,
+		opts: object
+	) => Promise<{ status: string; output: { content: string; finishReason: string } }>;
+	generateStream: (input: unknown, opts: object) => AsyncIterable<{ deltaContent?: string; finishReason?: string }>;
+};
 
 const OLLAMA_HOST = process.env.OLLAMA_HOST ?? 'http://localhost:11434';
 const EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL ?? 'nomic-embed-text';
@@ -47,10 +66,11 @@ async function reachable(): Promise<boolean> {
 const skip = !(await reachable());
 
 suite('OllamaBackend against a real Ollama instance', { skip }, () => {
-	let backend: OllamaBackend;
+	let backend: InstanceType<OllamaBackendCtor>;
 
-	before(() => {
-		backend = new OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') });
+	before(async () => {
+		const mod = (await import('../../components/ollama/index.ts')) as { OllamaBackend: OllamaBackendCtor };
+		backend = new mod.OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') });
 	});
 
 	test('embed returns a non-empty Float32Array vector', async () => {
@@ -87,10 +107,12 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => {
 	});
 
 	test('generate via chat shape (messages array) produces non-empty content', async () => {
-		const result = await backend.generate(
-			[{ role: 'user', content: 'Reply with the single word OK.' }],
-			{ accounting: ACCOUNTING, model: GENERATE_MODEL, maxTokens: 10, temperature: 0 }
-		);
+		const result = await backend.generate([{ role: 'user', content: 'Reply with the single word OK.' }], {
+			accounting: ACCOUNTING,
+			model: GENERATE_MODEL,
+			maxTokens: 10,
+			temperature: 0,
+		});
 		strictEqual(result.status, 'completed');
 		ok(typeof result.output.content === 'string' && result.output.content.length > 0);
 	});
@@ -114,13 +136,15 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => {
 
 	test('AbortSignal cancels an in-flight stream', async () => {
 		const ctrl = new AbortController();
-		const iter = backend.generateStream('Write a long paragraph about the ocean.', {
-			accounting: ACCOUNTING,
-			model: GENERATE_MODEL,
-			signal: ctrl.signal,
-			maxTokens: 1000,
-			temperature: 0.5,
-		})[Symbol.asyncIterator]();
+		const iter = backend
+			.generateStream('Write a long paragraph about the ocean.', {
+				accounting: ACCOUNTING,
+				model: GENERATE_MODEL,
+				signal: ctrl.signal,
+				maxTokens: 1000,
+				temperature: 0.5,
+			})
+			[Symbol.asyncIterator]();
 		// Get one chunk to confirm the stream started, then abort.
 		await iter.next();
 		ctrl.abort();
diff --git a/resources/models/bootstrap.ts b/resources/models/bootstrap.ts
index d223bfa29..1cb4c6b04 100644
--- a/resources/models/bootstrap.ts
+++ b/resources/models/bootstrap.ts
@@ -69,17 +69,13 @@ function registerKind(kind: ModelKind, entries: Record<string, ModelEntry> | und
 			// typo guards (`.unknown(false)` on modelEntrySchema) catch field-name
 			// typos before this point; reaching here means `backend:` itself names
 			// a type Harper doesn't ship a factory for in this version.
-			harperLogger.error(
-				`models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping`
-			);
+			harperLogger.error(`models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping`);
 			continue;
 		}
 		try {
 			factory({ logicalName, kind, config: entry });
 		} catch (err) {
-			harperLogger.error(
-				`models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})`
-			);
+			harperLogger.error(`models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})`);
 		}
 	}
 }
diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js
index 08dc2815f..adebed83f 100644
--- a/unitTests/components/ollama/index.test.js
+++ b/unitTests/components/ollama/index.test.js
@@ -81,9 +81,7 @@ describe('OllamaBackend', () => {
 
 	describe('embed', () => {
 		it('POSTs to /api/embed with the configured model and Float32Array output', async () => {
-			const fetch = mockFetch(() =>
-				jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 })
-			);
+			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 }));
 			const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch);
 			const result = await b.embed('hello', { accounting: ACCOUNTING });
 			assert.strictEqual(result.status, 'completed');
@@ -160,10 +158,7 @@ describe('OllamaBackend', () => {
 		it('raises OllamaBackendError when response vector count differs from input count', async () => {
 			const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] }));
 			const b = new OllamaBackend({ model: 'm' }, fetch);
-			await assert.rejects(
-				() => b.embed(['a', 'b'], { accounting: ACCOUNTING }),
-				/returned 1 vectors for 2 inputs/
-			);
+			await assert.rejects(() => b.embed(['a', 'b'], { accounting: ACCOUNTING }), /returned 1 vectors for 2 inputs/);
 		});
 
 		it('raises OllamaBackendError when a vector contains non-finite values', async () => {
@@ -231,10 +226,8 @@ describe('OllamaBackend', () => {
 			assert.strictEqual(result.output.content, 'reply');
 		});
 
-		it("prepends system as the first message when supplied via { messages, system }", async () => {
-			const fetch = mockFetch(() =>
-				jsonResponse({ message: { role: 'assistant', content: '' }, done: true })
-			);
+		it('prepends system as the first message when supplied via { messages, system }', async () => {
+			const fetch = mockFetch(() => jsonResponse({ message: { role: 'assistant', content: '' }, done: true }));
 			const b = new OllamaBackend({ model: 'llama3.2' }, fetch);
 			await b.generate(
 				{ messages: [{ role: 'user', content: 'q' }], system: 'be helpful' },
@@ -271,18 +264,14 @@ describe('OllamaBackend', () => {
 		});
 
 		it("maps done_reason='length' to finishReason='length'", async () => {
-			const fetch = mockFetch(() =>
-				jsonResponse({ response: 'cut', done: true, done_reason: 'length' })
-			);
+			const fetch = mockFetch(() => jsonResponse({ response: 'cut', done: true, done_reason: 'length' }));
 			const b = new OllamaBackend({ model: 'm' }, fetch);
 			const result = await b.generate('x', { accounting: ACCOUNTING });
 			assert.strictEqual(result.output.finishReason, 'length');
 		});
 
 		it('rejects a non-string content from /api/chat', async () => {
-			const fetch = mockFetch(() =>
-				jsonResponse({ message: { role: 'assistant', content: 42 }, done: true })
-			);
+			const fetch = mockFetch(() => jsonResponse({ message: { role: 'assistant', content: 42 }, done: true }));
 			const b = new OllamaBackend({ model: 'm' }, fetch);
 			await assert.rejects(
 				() => b.generate([{ role: 'user', content: 'q' }], { accounting: ACCOUNTING }),
@@ -293,10 +282,7 @@ describe('OllamaBackend', () => {
 		it('rejects a non-string response from /api/generate', async () => {
 			const fetch = mockFetch(() => jsonResponse({ response: { nested: 'obj' }, done: true }));
 			const b = new OllamaBackend({ model: 'm' }, fetch);
-			await assert.rejects(
-				() => b.generate('x', { accounting: ACCOUNTING }),
-				/response content is not a string/
-			);
+			await assert.rejects(() => b.generate('x', { accounting: ACCOUNTING }), /response content is not a string/);
 		});
 
 		it('drops non-integer token counts from usage', async () => {
@@ -402,7 +388,7 @@ describe('OllamaBackend', () => {
 
 		it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => {
 			// Emit > 1 MiB of bytes with no newline.
-			const huge = 'x'.repeat(1 << 20 + 1);
+			const huge = 'x'.repeat(1 << (20 + 1));
 			const body = new ReadableStream({
 				start(controller) {
 					controller.enqueue(new TextEncoder().encode(huge));

From ddb8c19f60575369d8c888a26ca40a7e62d3e191 Mon Sep 17 00:00:00 2001
From: Nathan Heskew <nathan@harperdb.io>
Date: Wed, 20 May 2026 15:28:48 -0700
Subject: [PATCH 6/6] fix(models): real abort assertion + correct oversize-line
 byte count
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two findings from claude-bot's inline PR review on #651:

- Integration `'AbortSignal cancels an in-flight stream'`: replace
  `ok(rejected || true)` (a tautology — asserts nothing) with a real
  termination check. Race the iterator drain against a 5 s deadline so
  the actual failure mode being guarded (hung stream after abort) fails
  the test instead of timing the suite out.
- Unit `'throws OllamaBackendError when a stream line exceeds the byte
  cap'`: `1 << 20 + 1` evaluated to `1 << 21` (2 MiB) due to operator
  precedence — `+` binds tighter than `<<`. Prettier's autofix
  parenthesized the wrong side (`1 << (20 + 1)`, same value).
  Re-parenthesize to `(1 << 20) + 1` (1 MiB + 1 byte) — exactly one
  byte past the cap, the comment now matches the allocation, and the
  test memory footprint halves.

Tracking: #629, #510

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../server/ollama-backend.test.ts             | 35 ++++++++++++-------
 unitTests/components/ollama/index.test.js     |  7 ++--
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts
index 69100d340..eb0ee9fc1 100644
--- a/integrationTests/server/ollama-backend.test.ts
+++ b/integrationTests/server/ollama-backend.test.ts
@@ -148,18 +148,29 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => {
 		// Get one chunk to confirm the stream started, then abort.
 		await iter.next();
 		ctrl.abort();
-		// Subsequent reads should reject (AbortError) — accept either rejection
-		// or premature done since fetch may swallow either path.
-		let rejected = false;
-		try {
-			while (true) {
-				const next = await iter.next();
-				if (next.done) break;
+		// After abort, the iterator must terminate — either by rejecting
+		// (AbortError / abort-flavored error) or by reaching `done`. The
+		// real failure mode this guards against is the stream hanging,
+		// where neither happens. Race a 5 s deadline so a hang fails the
+		// test instead of timing the suite out.
+		const drain = (async () => {
+			try {
+				while (true) {
+					const next = await iter.next();
+					if (next.done) return 'done' as const;
+				}
+			} catch (err) {
+				const name = (err as Error).name;
+				const isAbort = name === 'AbortError' || /abort/i.test(String(err));
+				return isAbort ? ('aborted' as const) : ('errored' as const);
 			}
-		} catch (err) {
-			rejected = (err as Error).name === 'AbortError' || /abort/i.test(String(err));
-		}
-		// Either an abort error fired, or the iterator terminated quickly post-abort.
-		ok(rejected || true);
+		})();
+		const HANG = Symbol('hang');
+		const deadline = new Promise<typeof HANG>((resolve) => setTimeout(() => resolve(HANG), 5000));
+		const outcome = await Promise.race([drain, deadline]);
+		ok(
+			outcome === 'done' || outcome === 'aborted',
+			`expected abort to terminate stream (done or AbortError); got ${String(outcome)}`
+		);
 	});
 });
diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js
index adebed83f..da0a2f71e 100644
--- a/unitTests/components/ollama/index.test.js
+++ b/unitTests/components/ollama/index.test.js
@@ -387,8 +387,11 @@ describe('OllamaBackend', () => {
 		});
 
 		it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => {
-			// Emit > 1 MiB of bytes with no newline.
-			const huge = 'x'.repeat(1 << (20 + 1));
+			// Emit just over 1 MiB of bytes with no newline. The parens matter:
+			// `+` binds tighter than `<<`, so the original `1 << 20 + 1` (and
+			// prettier's autofix `1 << (20 + 1)`) both evaluate to `1 << 21`
+			// (2 MiB). We want `(1 << 20) + 1` — exactly one byte past the cap.
+			const huge = 'x'.repeat((1 << 20) + 1);
 			const body = new ReadableStream({
 				start(controller) {
 					controller.enqueue(new TextEncoder().encode(huge));