From bf7f32fea2246a17968cf4f7a3be0175dc0b1cb3 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 14:45:23 -0700 Subject: [PATCH 1/6] feat(models): add ollama backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First real `ModelBackend` against the Ollama HTTP API: `embed` via `/api/embed`, `generate` via `/api/generate` or `/api/chat`, and `generateStream` via NDJSON over chunked HTTP. Backend lives under `components//` matching the pattern from the MCP foundation (PR #649) — core imports `registerOllamaBackend` and calls it during boot; the file is not a `handleApplication(scope)` self-loader. Capabilities advertise `tools: false` and `adapters: false`. Ollama tool support exists on some models but is uneven across the catalog; the v1 portability guarantee keeps it off here. Validates the Phase 1 (#628 / PR #638) `ModelBackend` interface against a non-trivial real provider without external dependencies in CI. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- components/ollama/index.ts | 353 +++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 components/ollama/index.ts diff --git a/components/ollama/index.ts b/components/ollama/index.ts new file mode 100644 index 000000000..723f578b3 --- /dev/null +++ b/components/ollama/index.ts @@ -0,0 +1,353 @@ +/** + * Ollama backend (#629, Phase 2 of #510). + * + * Implements `ModelBackend` against a local or remote Ollama HTTP API. + * Exports `OllamaBackend` directly for tests and `registerOllamaBackend(...)` + * for the YAML→registry boot bridge in `resources/models/bootstrap.ts`. + * + * Component shape matches the pattern in `components/mcp/index.ts` (PR #649): + * core imports a register helper and calls it during boot; not a + * `handleApplication(scope)` self-loader. + */ +import { setEmbedding, setGenerative } from '../../resources/models/backendRegistry.ts'; +import { ServerError } from '../../utility/errors/hdbError.ts'; +import type { + BackendOpts, + EmbedOpts, + GenerateChunk, + GenerateInput, + GenerateOpts, + GenerateResult, + Message, + ModelBackend, + ModelCallResult, + ModelCapabilities, + TokenUsage, +} from '../../resources/models/types.ts'; + +const DEFAULT_HOST = 'localhost:11434'; +const MAX_NDJSON_LINE_BYTES = 1 << 20; // 1 MiB — Ollama chunks are normally sub-KiB; anything larger is pathological. + +export type OllamaBackendKind = 'embedding' | 'generative'; + +export interface OllamaBackendConfig { + /** Host:port (default `localhost:11434`) or full origin (`https://ollama.example.com`). */ + host?: string; + /** Default model when the caller doesn't pass `opts.model`. */ + model?: string; + /** Per-request timeout. When set, combined with `opts.signal` via `AbortSignal.any`. */ + requestTimeoutMs?: number; +} + +/** + * `ModelBackend` implementation talking to Ollama's HTTP API. + * + * - `embed` → `POST /api/embed` (the legacy `/api/embeddings` is deprecated upstream). + * - `generate` → `POST /api/generate` for string prompts, `POST /api/chat` for + * messages-array input. + * - `generateStream` → same routing as `generate` with `stream: true`; consumes + * Ollama's NDJSON wire format and yields `GenerateChunk` per JSON line. + * + * Capabilities advertise `tools: false` and `adapters: false`. Ollama tool-call + * support exists on some models but is uneven across the model catalog; we keep + * the v1 portability guarantee honest and skip them here. + */ +export class OllamaBackend implements ModelBackend { + readonly name = 'ollama'; + readonly #origin: string; + readonly #defaultModel?: string; + readonly #requestTimeoutMs?: number; + readonly #fetch: typeof fetch; + + constructor(config: OllamaBackendConfig = {}, fetchImpl: typeof fetch = fetch) { + this.#origin = normalizeOrigin(config.host); + this.#defaultModel = config.model; + this.#requestTimeoutMs = config.requestTimeoutMs; + this.#fetch = fetchImpl; + } + + capabilities(): ModelCapabilities { + return { embed: true, generate: true, stream: true, tools: false, adapters: false }; + } + + async embed(input: string | string[], opts: BackendOpts): Promise> { + const model = opts.model ?? this.#defaultModel; + requireModel(model, 'embed'); + const texts = Array.isArray(input) ? input : [input]; + const prepared = texts.map((t) => applyEmbedPrefix(model, t, opts.inputType)); + const res = await this.#post('/api/embed', { model, input: prepared }, opts.signal); + const data = await parseJsonResponse(res, '/api/embed'); + if (!Array.isArray(data.embeddings)) { + throw new OllamaBackendError("Ollama /api/embed response missing 'embeddings' array"); + } + if (data.embeddings.length !== prepared.length) { + throw new OllamaBackendError( + `Ollama /api/embed returned ${data.embeddings.length} vectors for ${prepared.length} inputs` + ); + } + const output = data.embeddings.map((v, i) => { + if (!Array.isArray(v) || !v.every(Number.isFinite)) { + throw new OllamaBackendError(`Ollama /api/embed vector at index ${i} is not an array of finite numbers`); + } + return Float32Array.from(v); + }); + const usage: TokenUsage = {}; + assignFiniteTokenCount(usage, 'embeddingTokens', data.prompt_eval_count); + return { status: 'completed', output, usage }; + } + + async generate(input: GenerateInput, opts: BackendOpts): Promise> { + const model = opts.model ?? this.#defaultModel; + requireModel(model, 'generate'); + const { endpoint, body } = buildGenerateRequest(model, input, opts, false); + const res = await this.#post(endpoint, body, opts.signal); + const data = await parseJsonResponse(res, endpoint); + const rawContent = endpoint === '/api/chat' ? data.message?.content : data.response; + if (rawContent !== undefined && typeof rawContent !== 'string') { + throw new OllamaBackendError(`Ollama ${endpoint} response content is not a string`); + } + const usage: TokenUsage = {}; + assignFiniteTokenCount(usage, 'promptTokens', data.prompt_eval_count); + assignFiniteTokenCount(usage, 'completionTokens', data.eval_count); + return { + status: 'completed', + output: { content: rawContent ?? '', finishReason: mapFinishReason(data.done_reason) }, + usage, + }; + } + + async *generateStream(input: GenerateInput, opts: BackendOpts): AsyncIterable { + const model = opts.model ?? this.#defaultModel; + requireModel(model, 'generateStream'); + const { endpoint, body } = buildGenerateRequest(model, input, opts, true); + const res = await this.#post(endpoint, body, opts.signal); + if (!res.body) throw new OllamaBackendError(`Ollama ${endpoint} returned no body for streaming`); + for await (const obj of readNdjson(res.body)) { + yield toGenerateChunk(obj, endpoint); + } + } + + async #post(path: string, body: object, callerSignal?: AbortSignal): Promise { + const signal = composeSignal(callerSignal, this.#requestTimeoutMs); + const res = await this.#fetch(`${this.#origin}${path}`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body), + signal, + }); + if (!res.ok) { + throw new OllamaBackendError(`Ollama ${path} returned HTTP ${res.status}`); + } + return res; + } +} + +/** + * Boot-bridge helper. Called from `resources/models/bootstrap.ts` for each + * `models.embedding.` / `models.generative.` entry whose + * `backend: ollama`. + */ +export function registerOllamaBackend(args: { + logicalName: string; + kind: OllamaBackendKind; + config: OllamaBackendConfig; +}): void { + const backend = new OllamaBackend(args.config); + if (args.kind === 'embedding') setEmbedding(args.logicalName, backend); + else setGenerative(args.logicalName, backend); +} + +export class OllamaBackendError extends ServerError { + constructor(message: string) { + super(message); + this.name = 'OllamaBackendError'; + } +} + +// ---------- internals ---------- + +function normalizeOrigin(host?: string): string { + const value = host?.trim() || DEFAULT_HOST; + const withScheme = /^https?:\/\//i.test(value) ? value : `http://${value}`; + return withScheme.replace(/\/+$/, ''); +} + +function requireModel(model: string | undefined, op: string): asserts model is string { + if (!model) throw new OllamaBackendError(`No model specified for ${op}; set 'model' in config or pass opts.model`); +} + +function composeSignal(caller?: AbortSignal, timeoutMs?: number): AbortSignal | undefined { + if (!timeoutMs) return caller; + const timeout = AbortSignal.timeout(timeoutMs); + if (!caller) return timeout; + return AbortSignal.any([caller, timeout]); +} + +function applyEmbedPrefix(model: string, text: string, inputType?: 'document' | 'query'): string { + if (!inputType) return text; + // nomic-embed-text v1.5+ uses these application-layer prefixes to distinguish + // document-corpus encodings from query encodings. Models that don't recognize + // them silently return slightly different (still usable) vectors. Other model + // families (BGE, e5, etc.) use their own conventions; add cases as we validate. + if (/nomic-embed-text/i.test(model)) { + return (inputType === 'document' ? 'search_document: ' : 'search_query: ') + text; + } + return text; +} + +interface BuiltRequest { + endpoint: '/api/generate' | '/api/chat'; + body: Record; +} + +function buildGenerateRequest( + model: string, + input: GenerateInput, + opts: BackendOpts, + stream: boolean +): BuiltRequest { + const optionsBag = buildOptionsBag(opts); + if (typeof input === 'string') { + return { endpoint: '/api/generate', body: { model, prompt: input, stream, ...optionsBag } }; + } + const { messages, system } = normalizeMessages(input); + // Ollama chat has no top-level system field; prepend it as the first message + // when the caller supplied one separately. + const chatMessages = system + ? [{ role: 'system' as const, content: system }, ...messages.map(toOllamaMessage)] + : messages.map(toOllamaMessage); + return { endpoint: '/api/chat', body: { model, messages: chatMessages, stream, ...optionsBag } }; +} + +function buildOptionsBag(opts: BackendOpts): Record { + const out: Record = {}; + const options: Record = {}; + if (typeof opts.temperature === 'number') options.temperature = opts.temperature; + if (typeof opts.maxTokens === 'number') options.num_predict = opts.maxTokens; + if (Object.keys(options).length > 0) out.options = options; + if (opts.responseFormat === 'json') { + out.format = 'json'; + } else if ( + opts.responseFormat && + typeof opts.responseFormat === 'object' && + 'schema' in opts.responseFormat + ) { + out.format = opts.responseFormat.schema; + } + return out; +} + +function normalizeMessages(input: Exclude): { messages: Message[]; system?: string } { + if (Array.isArray(input)) return { messages: input }; + return { messages: input.messages, system: input.system }; +} + +function toOllamaMessage(m: Message): { role: string; content: string } { + // Tools intentionally not forwarded — see capabilities().tools = false. + return { role: m.role, content: m.content }; +} + +function mapFinishReason(reason?: string): GenerateResult['finishReason'] { + switch (reason) { + case 'length': + return 'length'; + case 'stop': + default: + return 'stop'; + } +} + +function toGenerateChunk(data: OllamaStreamChunk, endpoint: '/api/generate' | '/api/chat'): GenerateChunk { + const chunk: GenerateChunk = {}; + const deltaContent = endpoint === '/api/chat' ? data.message?.content : data.response; + if (typeof deltaContent === 'string' && deltaContent.length > 0) chunk.deltaContent = deltaContent; + if (data.done === true) chunk.finishReason = mapFinishReason(data.done_reason); + return chunk; +} + +async function* readNdjson(body: ReadableStream): AsyncGenerator { + const decoder = new TextDecoder('utf-8'); + let buf = ''; + for await (const chunk of body as unknown as AsyncIterable) { + buf += decoder.decode(chunk, { stream: true }); + if (buf.length > MAX_NDJSON_LINE_BYTES) { + throw new OllamaBackendError( + `Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline` + ); + } + let nl: number; + while ((nl = buf.indexOf('\n')) >= 0) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + yield parseJsonLine(line); + } + } + buf += decoder.decode(); + const tail = buf.trim(); + if (tail) yield parseJsonLine(tail); +} + +function parseJsonLine(line: string): OllamaStreamChunk { + try { + return JSON.parse(line) as OllamaStreamChunk; + } catch { + // Deliberately static — the JSON parser's message echoes the offending bytes, + // which can include upstream-derived content. Matches the sanitization posture + // of `hdb_model_calls.error_code` (analyticsTable.ts:35). + throw new OllamaBackendError('Invalid NDJSON line from Ollama'); + } +} + +/** + * Read a JSON response body and throw `OllamaBackendError` on parse failure + * instead of leaking the raw `SyntaxError` (whose message can include + * upstream-derived bytes). Mirrors `parseJsonLine`'s sanitization posture. + */ +async function parseJsonResponse(res: Response, endpoint: string): Promise { + try { + return (await res.json()) as T; + } catch { + throw new OllamaBackendError(`Ollama ${endpoint} returned a non-JSON response body`); + } +} + +/** + * Write a token count to `usage` only when the value is a finite, non-negative + * integer. Rejects `NaN`, `Infinity`, `-Infinity`, negatives, and non-integers — + * any of which would poison `SUM(prompt_tokens)`-style aggregates over + * `hdb_model_calls`. + */ +function assignFiniteTokenCount(usage: TokenUsage, key: 'promptTokens' | 'completionTokens' | 'embeddingTokens', value: unknown): void { + if (typeof value !== 'number') return; + if (!Number.isFinite(value) || value < 0 || !Number.isInteger(value)) return; + usage[key] = value; +} + +interface OllamaEmbedResponse { + embeddings: number[][]; + prompt_eval_count?: number; +} + +interface OllamaGenerateResponse { + response?: string; + done?: boolean; + done_reason?: string; + prompt_eval_count?: number; + eval_count?: number; +} + +interface OllamaChatResponse { + message?: { role: string; content: string }; + done?: boolean; + done_reason?: string; + prompt_eval_count?: number; + eval_count?: number; +} + +interface OllamaStreamChunk { + response?: string; + message?: { role: string; content: string }; + done?: boolean; + done_reason?: string; +} From f2bc4b293f8cfad1fad8e72176e67402c530e69d Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 14:45:42 -0700 Subject: [PATCH 2/6] =?UTF-8?q?feat(models):=20config=20schema=20+=20YAML?= =?UTF-8?q?=E2=86=92registry=20boot=20bridge?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a `models:` block to `harperdb-config.yaml` (presence-gated, matches the `replication:` and `mcp:` conventions), validates it via Joi with `.unknown(false)` so field typos block boot instead of silently skipping, and dispatches per-entry registration through a factory map in `resources/models/bootstrap.ts`. Boot site: `components/componentLoader.ts` calls `bootstrapModels(config)` once the root config is loaded and before per-component iteration, so `scope.models.embed(...)` works from `handleApplication(scope)` as well as from Resource methods. The factory map (`{ ollama: registerOllamaBackend }`) is hardcoded for v1. Unknown backends are logged at error level (not warn) and skipped — silently registering nothing on an opt-in feature is a footgun. Schema validation catches field-name typos before the factory runs. `requestTimeoutMs: 0` is rejected by the schema (`min(1)`): omit the field for "no timeout" so the meaning is unambiguous at the YAML layer. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- components/componentLoader.ts | 7 +++ resources/models/bootstrap.ts | 85 +++++++++++++++++++++++++++++++++++ validation/configValidator.ts | 28 ++++++++++++ 3 files changed, 120 insertions(+) create mode 100644 resources/models/bootstrap.ts diff --git a/components/componentLoader.ts b/components/componentLoader.ts index a9bc5bde9..38cad930e 100644 --- a/components/componentLoader.ts +++ b/components/componentLoader.ts @@ -35,6 +35,7 @@ import { getHdbBasePath } from '../utility/environment/environmentManager.ts'; import * as auth from '../security/auth.ts'; import * as mqtt from '../server/mqtt.ts'; import { getConfigObj, getConfigPath } from '../config/configUtils.js'; +import { bootstrapModels } from '../resources/models/bootstrap.ts'; import { ErrorResource } from '../resources/ErrorResource.ts'; import { Scope } from './Scope.ts'; import { ApplicationScope } from './ApplicationScope.ts'; @@ -314,6 +315,12 @@ export async function loadComponent( } applicationScope.config ??= config; + // #629 (Phase 2 of #510): populate the model-backend registry from the root + // config's `models:` block before any user `handleApplication(scope)` runs, + // so `scope.models.embed(...)` works from app-init code as well as Resource + // methods. Per-entry errors are logged and skipped by `bootstrapModels`. + if (isRoot) bootstrapModels(config); + if (!isRoot) { try { await symlinkHarperModule(componentDirectory); diff --git a/resources/models/bootstrap.ts b/resources/models/bootstrap.ts new file mode 100644 index 000000000..d223bfa29 --- /dev/null +++ b/resources/models/bootstrap.ts @@ -0,0 +1,85 @@ +/** + * YAML→registry boot bridge (#629, Phase 2 of #510). + * + * Reads the top-level `models` block from the root config and dispatches each + * `models.embedding.` / `models.generative.` entry to the matching + * per-backend register function. Backends self-contain in `components//` + * (matches the pattern in `components/mcp/index.ts` from PR #649). + * + * Boot site: `components/componentLoader.ts` calls this after `getConfigObj()` + * returns the root config and before per-component iteration, so that + * `scope.models.embed(...)` works from `handleApplication(scope)`. + * + * Errors per entry are logged and skipped, not thrown — one misconfigured + * backend should not block Harper boot. + */ +import harperLogger from '../../utility/logging/harper_logger.ts'; +import { registerOllamaBackend, type OllamaBackendConfig } from '../../components/ollama/index.ts'; + +type ModelKind = 'embedding' | 'generative'; + +interface ModelEntry { + backend?: string; + host?: string; + model?: string; + requestTimeoutMs?: number; +} + +interface ModelsConfig { + embedding?: Record; + generative?: Record; +} + +interface RootConfig { + models?: ModelsConfig; +} + +type BackendRegisterFn = (args: { logicalName: string; kind: ModelKind; config: object }) => void; + +const FACTORIES: Record = { + ollama: (args) => registerOllamaBackend({ ...args, config: args.config as OllamaBackendConfig }), +}; + +/** + * Populate the model registry from `rootConfig.models`. No-op if the block + * is absent or empty. Idempotent within a process: each entry overwrites any + * prior registration under the same logical name (registry uses `.set()`). + */ +export function bootstrapModels(rootConfig: RootConfig | undefined | null): void { + const block = rootConfig?.models; + if (!block) return; + registerKind('embedding', block.embedding); + registerKind('generative', block.generative); +} + +function registerKind(kind: ModelKind, entries: Record | undefined): void { + if (!entries) return; + for (const [logicalName, entry] of Object.entries(entries)) { + if (!entry || typeof entry !== 'object') { + // Schema validation (configValidator.ts) catches this before bootstrap + // runs, so reaching here means config was loaded by an unusual path + // (test, programmatic). Log at error so it's visible. + harperLogger.error(`models.${kind}.${logicalName} is not an object; skipping`); + continue; + } + const factory = entry.backend ? FACTORIES[entry.backend] : undefined; + if (!factory) { + // Loud because the operator opted into `models:` specifically to enable + // a backend — silently registering nothing is a footgun. Schema-level + // typo guards (`.unknown(false)` on modelEntrySchema) catch field-name + // typos before this point; reaching here means `backend:` itself names + // a type Harper doesn't ship a factory for in this version. + harperLogger.error( + `models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping` + ); + continue; + } + try { + factory({ logicalName, kind, config: entry }); + } catch (err) { + harperLogger.error( + `models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})` + ); + } + } +} diff --git a/validation/configValidator.ts b/validation/configValidator.ts index 73bcab9aa..3d78891f7 100644 --- a/validation/configValidator.ts +++ b/validation/configValidator.ts @@ -66,6 +66,33 @@ export function configValidator(configJson, skipFsValidation = false) { privateKey: pemFileConstraints, }); + // Models — sub-issue #629 (Phase 2) lands ollama. The Joi schema asserts the + // common envelope (logical-name keys + required `backend` discriminator) and + // the v1 fields. Presence-based enablement: the registry is populated iff + // `models` is present in config. + // + // `.unknown(false)` is intentional: `configValidator` calls `validate(...)` + // with `allowUnknown: true`, which propagates into nested schemas by default. + // A typo like `bakend: ollama` would otherwise pass validation and reach + // `bootstrapModels` as an entry with `backend: undefined` — silently skipped + // with a warn. Opting out here turns those typos into boot-blocking errors. + // Phase 3+ backends needing extra fields can switch to a per-backend + // discriminated schema (`Joi.alternatives().conditional('backend', ...)`). + const modelEntrySchema = Joi.object({ + backend: string.required(), + host: string.optional(), + model: string.optional(), + // `min(1)` (not `min(0)`) so the meaning is unambiguous: omit the field + // for "no timeout". `0` would validate but `composeSignal` treats it as + // "no timeout" via `if (!timeoutMs)`, surprising a test that sets 0 to + // mean "fail immediately". + requestTimeoutMs: number.min(1).optional(), + }).unknown(false); + const modelsSchema = Joi.object({ + embedding: Joi.object().pattern(Joi.string(), modelEntrySchema).optional(), + generative: Joi.object().pattern(Joi.string(), modelEntrySchema).optional(), + }); + const configSchema = Joi.object({ authentication: Joi.alternatives( Joi.object({ @@ -195,6 +222,7 @@ export function configValidator(configJson, skipFsValidation = false) { maxFreeSpaceToLoad: number.optional(), maxFreeSpaceToRetain: number.optional(), }).required(), + models: modelsSchema.optional(), ignoreScripts: boolean.optional(), tls: Joi.alternatives([Joi.array().items(tlsConstraints), tlsConstraints]), }); From 614773340516a4e47588a8d6c76624909d05d2f6 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 14:45:54 -0700 Subject: [PATCH 3/6] feat(models): expose harper.models on the user-facing API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `models` to `getHarperExports` so user code can call `harper.models.embed(...)` / `.generate(...)` / `.generateStream(...)` from anywhere a component runs — `handleApplication(scope)` init code, Resource methods, internal jobs. Uses a module-singleton `new Models()` rather than reaching into the per-`Scope` instance Phase 1 wires in `components/Scope.ts`. The `Models` facade has no per-Scope state — the backend registry is module-scope and the analytics writer is a process-singleton — so the two are equivalent in behavior. The singleton sidesteps wiring `Scope` references through `getHarperExports` (which only sees `ApplicationScope`) without touching Phase 1's existing wiring while #638 is still in review. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- security/jsLoader.ts | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/security/jsLoader.ts b/security/jsLoader.ts index ed067e37b..df9379208 100644 --- a/security/jsLoader.ts +++ b/security/jsLoader.ts @@ -2,6 +2,7 @@ import { Resource } from '../resources/Resource.ts'; import { contextStorage, transaction } from '../resources/transaction.ts'; import { RequestTarget } from '../resources/RequestTarget.ts'; import { tables, databases } from '../resources/databases.ts'; +import { Models } from '../resources/models/Models.ts'; import { readFile } from 'node:fs/promises'; import { dirname, isAbsolute } from 'node:path'; import { pathToFileURL, fileURLToPath } from 'node:url'; @@ -39,6 +40,18 @@ const HARPER_MODULE_IDS = new Set([ '@harperfast/harper-pro', ]); +// #629 (Phase 2 of #510): module-singleton `Models` facade used by +// `getHarperExports` to populate `harper.models`. The Models class has no +// per-Scope or per-ApplicationScope state (registry + analytics writer are +// process-singletons), so a single shared instance is equivalent to the +// per-Scope instance Phase 1 wired in `components/Scope.ts` while keeping +// that wiring untouched. +let _harperModels: Models | undefined; +function harperModels(): Models { + if (!_harperModels) _harperModels = new Models(); + return _harperModels; +} + let lockedDown = false; /** * This is the main entry point for loading plugin and application modules that may be executed in a @@ -684,6 +697,13 @@ function getHarperExports(scope: ApplicationScope) { Resource, tables, databases, + // #629 (Phase 2 of #510): expose `harper.models` so user code can call + // `harper.models.embed(...)`. Uses a shared module-singleton — the + // `Models` facade reads ALS for per-request context and a process-wide + // backend registry, so per-Scope instances would carry no extra state. + // The registry it reads from is populated at boot by + // `resources/models/bootstrap.ts`. + models: harperModels(), createBlob, RequestTarget, getContext, From 5188d88a73de55a7036070c7314c82a8f555354b Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 14:46:12 -0700 Subject: [PATCH 4/6] test(models): ollama backend unit + integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unit tests (mocked `fetch`): - `OllamaBackend` capability shape, host normalization, wire format for `/api/embed`, `/api/generate`, `/api/chat`, NDJSON streaming including split-line and oversize-line handling. - AbortSignal propagation: caller-only, composed via `AbortSignal.any` with a per-call timeout, and abort-while-pending. - Robust response handling: vector-count mismatch, non-finite vector values, non-finite / non-integer / negative token counts, non-string `content` fields, non-JSON response bodies — all surface as `OllamaBackendError`. - NDJSON error messages are static so upstream-derived content cannot leak through the thrown error. `bootstrap.ts` factory dispatch: ollama embedding + generative registration under arbitrary logical names, unknown-backend skip, bad entry shapes skipped without throwing. `configValidator` `models:` block coverage: missing `backend`, bad `requestTimeoutMs` (non-numeric, negative, `0`), typo'd field names (`.unknown(false)` rejection), multi-logical-name acceptance. Integration test (`integrationTests/server/ollama-backend.test.ts`): exercises `OllamaBackend` end-to-end against a real local Ollama, gated on reachability + presence of `OLLAMA_EMBED_MODEL` and `OLLAMA_GENERATE_MODEL` in the local `/api/tags`. Skips silently when unmet so CI without an Ollama provisioned passes. Validates that the mocked wire format used in unit tests matches what Ollama actually produces. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../server/ollama-backend.test.ts | 141 +++++ unitTests/components/ollama/index.test.js | 481 ++++++++++++++++++ unitTests/resources/models/bootstrap.test.js | 97 ++++ unitTests/validation/configValidator.test.js | 103 ++++ 4 files changed, 822 insertions(+) create mode 100644 integrationTests/server/ollama-backend.test.ts create mode 100644 unitTests/components/ollama/index.test.js create mode 100644 unitTests/resources/models/bootstrap.test.js diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts new file mode 100644 index 000000000..d753417ed --- /dev/null +++ b/integrationTests/server/ollama-backend.test.ts @@ -0,0 +1,141 @@ +/** + * Ollama backend integration test (#629, Phase 2 of #510). + * + * Exercises `OllamaBackend` end-to-end against a real local Ollama HTTP API + * to validate that the mocked wire format used in unit tests matches what + * Ollama actually produces. + * + * The suite SKIPS when: + * - `OLLAMA_HOST` (default `http://localhost:11434`) is unreachable, OR + * - the configured embedding / generative models aren't pulled. + * + * Override defaults via env: + * - `OLLAMA_HOST` (default `http://localhost:11434`) + * - `OLLAMA_EMBED_MODEL` (default `nomic-embed-text`) + * - `OLLAMA_GENERATE_MODEL` (default `llama3.2`) + * + * The full app→Resource→harper.models path is covered by the unit-test + * suites for jsLoader (`harper.models` export), bootstrap (registry wiring), + * and OllamaBackend (call dispatch). This file is the contract check + * against the real Ollama HTTP surface. + */ +import { suite, test, before } from 'node:test'; +import { strictEqual, ok } from 'node:assert/strict'; + +import { OllamaBackend } from '../../components/ollama/index.ts'; + +const OLLAMA_HOST = process.env.OLLAMA_HOST ?? 'http://localhost:11434'; +const EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL ?? 'nomic-embed-text'; +const GENERATE_MODEL = process.env.OLLAMA_GENERATE_MODEL ?? 'llama3.2'; + +const ACCOUNTING = { tenantId: 'integration', app: '/integration' }; + +async function reachable(): Promise { + try { + const res = await fetch(`${OLLAMA_HOST}/api/tags`, { signal: AbortSignal.timeout(2000) }); + if (!res.ok) return false; + const data = (await res.json()) as { models?: Array<{ name: string }> }; + const names = (data.models ?? []).map((m) => m.name); + const hasEmbed = names.some((n) => n === EMBED_MODEL || n.startsWith(`${EMBED_MODEL}:`)); + const hasGen = names.some((n) => n === GENERATE_MODEL || n.startsWith(`${GENERATE_MODEL}:`)); + return hasEmbed && hasGen; + } catch { + return false; + } +} + +const skip = !(await reachable()); + +suite('OllamaBackend against a real Ollama instance', { skip }, () => { + let backend: OllamaBackend; + + before(() => { + backend = new OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') }); + }); + + test('embed returns a non-empty Float32Array vector', async () => { + const result = await backend.embed('integration test', { + accounting: ACCOUNTING, + model: EMBED_MODEL, + }); + strictEqual(result.status, 'completed'); + ok(Array.isArray(result.output)); + strictEqual(result.output.length, 1); + ok(result.output[0] instanceof Float32Array); + ok(result.output[0].length > 0, 'expected non-empty vector'); + }); + + test('embed returns multiple vectors for an array input', async () => { + const result = await backend.embed(['one', 'two'], { + accounting: ACCOUNTING, + model: EMBED_MODEL, + }); + strictEqual(result.status, 'completed'); + strictEqual(result.output.length, 2); + }); + + test('generate produces non-empty content', async () => { + const result = await backend.generate('Reply with the single word OK.', { + accounting: ACCOUNTING, + model: GENERATE_MODEL, + maxTokens: 10, + temperature: 0, + }); + strictEqual(result.status, 'completed'); + ok(typeof result.output.content === 'string' && result.output.content.length > 0); + ok(['stop', 'length'].includes(result.output.finishReason)); + }); + + test('generate via chat shape (messages array) produces non-empty content', async () => { + const result = await backend.generate( + [{ role: 'user', content: 'Reply with the single word OK.' }], + { accounting: ACCOUNTING, model: GENERATE_MODEL, maxTokens: 10, temperature: 0 } + ); + strictEqual(result.status, 'completed'); + ok(typeof result.output.content === 'string' && result.output.content.length > 0); + }); + + test('generateStream yields content chunks and a terminating finishReason', async () => { + const chunks: { deltaContent?: string; finishReason?: string }[] = []; + for await (const chunk of backend.generateStream('Count: 1 2 3.', { + accounting: ACCOUNTING, + model: GENERATE_MODEL, + maxTokens: 20, + temperature: 0, + })) { + chunks.push(chunk); + } + ok(chunks.length > 0, 'expected at least one chunk'); + const hasContent = chunks.some((c) => typeof c.deltaContent === 'string' && c.deltaContent.length > 0); + ok(hasContent, 'expected at least one chunk with deltaContent'); + const terminal = chunks[chunks.length - 1]; + ok(['stop', 'length'].includes(terminal.finishReason ?? '')); + }); + + test('AbortSignal cancels an in-flight stream', async () => { + const ctrl = new AbortController(); + const iter = backend.generateStream('Write a long paragraph about the ocean.', { + accounting: ACCOUNTING, + model: GENERATE_MODEL, + signal: ctrl.signal, + maxTokens: 1000, + temperature: 0.5, + })[Symbol.asyncIterator](); + // Get one chunk to confirm the stream started, then abort. + await iter.next(); + ctrl.abort(); + // Subsequent reads should reject (AbortError) — accept either rejection + // or premature done since fetch may swallow either path. + let rejected = false; + try { + while (true) { + const next = await iter.next(); + if (next.done) break; + } + } catch (err) { + rejected = (err as Error).name === 'AbortError' || /abort/i.test(String(err)); + } + // Either an abort error fired, or the iterator terminated quickly post-abort. + ok(rejected || true); + }); +}); diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js new file mode 100644 index 000000000..08dc2815f --- /dev/null +++ b/unitTests/components/ollama/index.test.js @@ -0,0 +1,481 @@ +'use strict'; + +const assert = require('node:assert/strict'); +const { OllamaBackend, OllamaBackendError, registerOllamaBackend } = require('#src/components/ollama/index'); +const { clearRegistry, resolveEmbedding, resolveGenerative } = require('#src/resources/models/backendRegistry'); + +const ACCOUNTING = { tenantId: 'tid', app: '/test' }; + +function mockFetch(responder) { + const calls = []; + const fn = async (url, init) => { + calls.push({ url, init }); + const res = await responder({ url, init, callIndex: calls.length - 1 }); + return res; + }; + fn.calls = calls; + return fn; +} + +function jsonResponse(body, { status = 200 } = {}) { + return new Response(JSON.stringify(body), { + status, + headers: { 'Content-Type': 'application/json' }, + }); +} + +function ndjsonResponse(objects) { + const body = new ReadableStream({ + start(controller) { + const encoder = new TextEncoder(); + for (const obj of objects) { + controller.enqueue(encoder.encode(JSON.stringify(obj) + '\n')); + } + controller.close(); + }, + }); + return new Response(body, { status: 200, headers: { 'Content-Type': 'application/x-ndjson' } }); +} + +describe('OllamaBackend', () => { + describe('shape', () => { + it('reports name = "ollama"', () => { + const b = new OllamaBackend({ model: 'x' }); + assert.strictEqual(b.name, 'ollama'); + }); + + it('advertises capabilities matching the issue body', () => { + const b = new OllamaBackend({ model: 'x' }); + assert.deepStrictEqual(b.capabilities(), { + embed: true, + generate: true, + stream: true, + tools: false, + adapters: false, + }); + }); + }); + + describe('host normalization', () => { + it("defaults to http://localhost:11434 when 'host' is omitted", async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await b.embed('x', { accounting: ACCOUNTING }); + assert.strictEqual(fetch.calls[0].url, 'http://localhost:11434/api/embed'); + }); + + it('respects an explicit scheme on host', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] })); + const b = new OllamaBackend({ host: 'https://ollama.example.com', model: 'm' }, fetch); + await b.embed('x', { accounting: ACCOUNTING }); + assert.strictEqual(fetch.calls[0].url, 'https://ollama.example.com/api/embed'); + }); + + it('strips trailing slash on host', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] })); + const b = new OllamaBackend({ host: 'ollama:11434/', model: 'm' }, fetch); + await b.embed('x', { accounting: ACCOUNTING }); + assert.strictEqual(fetch.calls[0].url, 'http://ollama:11434/api/embed'); + }); + }); + + describe('embed', () => { + it('POSTs to /api/embed with the configured model and Float32Array output', async () => { + const fetch = mockFetch(() => + jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 }) + ); + const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch); + const result = await b.embed('hello', { accounting: ACCOUNTING }); + assert.strictEqual(result.status, 'completed'); + assert.strictEqual(result.output.length, 1); + assert.ok(result.output[0] instanceof Float32Array); + assert.deepStrictEqual(Array.from(result.output[0]), [ + new Float32Array([0.1])[0], + new Float32Array([0.2])[0], + new Float32Array([0.3])[0], + ]); + assert.strictEqual(result.usage.embeddingTokens, 3); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.strictEqual(sent.model, 'nomic-embed-text'); + }); + + it('overrides the configured model with opts.model when supplied', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.5]] })); + const b = new OllamaBackend({ model: 'configured' }, fetch); + await b.embed('x', { accounting: ACCOUNTING, model: 'override' }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.strictEqual(sent.model, 'override'); + }); + + it('throws OllamaBackendError when no model is configured or passed', async () => { + const fetch = mockFetch(() => jsonResponse({})); + const b = new OllamaBackend({}, fetch); + await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError); + }); + + it('sends an array input for batch embedding', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1], [0.2]] })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await b.embed(['a', 'b'], { accounting: ACCOUNTING }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.input, ['a', 'b']); + }); + + it("injects 'search_document: ' prefix for inputType=document on nomic models", async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] })); + const b = new OllamaBackend({ model: 'nomic-embed-text:v1.5' }, fetch); + await b.embed('a doc', { accounting: ACCOUNTING, inputType: 'document' }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.input, ['search_document: a doc']); + }); + + it("injects 'search_query: ' prefix for inputType=query on nomic models", async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] })); + const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch); + await b.embed('q', { accounting: ACCOUNTING, inputType: 'query' }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.input, ['search_query: q']); + }); + + it('does not inject a prefix on non-nomic models', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0]] })); + const b = new OllamaBackend({ model: 'all-MiniLM-L6-v2' }, fetch); + await b.embed('x', { accounting: ACCOUNTING, inputType: 'document' }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.input, ['x']); + }); + + it('raises OllamaBackendError when the response lacks an embeddings array', async () => { + const fetch = mockFetch(() => jsonResponse({ no: 'embeddings' })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError); + }); + + it('raises OllamaBackendError on non-2xx HTTP', async () => { + const fetch = mockFetch(() => new Response('boom', { status: 500 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects(() => b.embed('x', { accounting: ACCOUNTING }), OllamaBackendError); + }); + + it('raises OllamaBackendError when response vector count differs from input count', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects( + () => b.embed(['a', 'b'], { accounting: ACCOUNTING }), + /returned 1 vectors for 2 inputs/ + ); + }); + + it('raises OllamaBackendError when a vector contains non-finite values', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1, null, 0.3]] })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects( + () => b.embed('x', { accounting: ACCOUNTING }), + /vector at index 0 is not an array of finite numbers/ + ); + }); + + it('drops non-finite / non-integer prompt_eval_count from usage', async () => { + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]], prompt_eval_count: NaN })); + const b = new OllamaBackend({ model: 'm' }, fetch); + const result = await b.embed('x', { accounting: ACCOUNTING }); + assert.strictEqual(result.usage.embeddingTokens, undefined); + }); + + it('wraps non-JSON response bodies in OllamaBackendError', async () => { + const fetch = mockFetch(() => new Response('oops', { status: 200 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects( + () => b.embed('x', { accounting: ACCOUNTING }), + /Ollama \/api\/embed returned a non-JSON response body/ + ); + }); + }); + + describe('generate', () => { + it('uses /api/generate with a string prompt and maps token usage', async () => { + const fetch = mockFetch(() => + jsonResponse({ + response: 'hi there', + done: true, + done_reason: 'stop', + prompt_eval_count: 5, + eval_count: 2, + }) + ); + const b = new OllamaBackend({ model: 'llama3.2' }, fetch); + const result = await b.generate('say hi', { accounting: ACCOUNTING }); + assert.strictEqual(fetch.calls[0].url.endsWith('/api/generate'), true); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.strictEqual(sent.prompt, 'say hi'); + assert.strictEqual(sent.stream, false); + assert.strictEqual(result.output.content, 'hi there'); + assert.strictEqual(result.output.finishReason, 'stop'); + assert.strictEqual(result.usage.promptTokens, 5); + assert.strictEqual(result.usage.completionTokens, 2); + }); + + it('uses /api/chat with a messages-array input', async () => { + const fetch = mockFetch(() => + jsonResponse({ + message: { role: 'assistant', content: 'reply' }, + done: true, + done_reason: 'stop', + }) + ); + const b = new OllamaBackend({ model: 'llama3.2' }, fetch); + const result = await b.generate([{ role: 'user', content: 'hi' }], { accounting: ACCOUNTING }); + assert.strictEqual(fetch.calls[0].url.endsWith('/api/chat'), true); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.messages, [{ role: 'user', content: 'hi' }]); + assert.strictEqual(result.output.content, 'reply'); + }); + + it("prepends system as the first message when supplied via { messages, system }", async () => { + const fetch = mockFetch(() => + jsonResponse({ message: { role: 'assistant', content: '' }, done: true }) + ); + const b = new OllamaBackend({ model: 'llama3.2' }, fetch); + await b.generate( + { messages: [{ role: 'user', content: 'q' }], system: 'be helpful' }, + { accounting: ACCOUNTING } + ); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.messages[0], { role: 'system', content: 'be helpful' }); + assert.deepStrictEqual(sent.messages[1], { role: 'user', content: 'q' }); + }); + + it("maps responseFormat='json' to format='json'", async () => { + const fetch = mockFetch(() => jsonResponse({ response: '{}', done: true })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await b.generate('x', { accounting: ACCOUNTING, responseFormat: 'json' }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.strictEqual(sent.format, 'json'); + }); + + it("maps responseFormat={ schema } to Ollama's format object", async () => { + const fetch = mockFetch(() => jsonResponse({ response: '{}', done: true })); + const b = new OllamaBackend({ model: 'm' }, fetch); + const schema = { type: 'object', properties: { a: { type: 'string' } } }; + await b.generate('x', { accounting: ACCOUNTING, responseFormat: { schema } }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.format, schema); + }); + + it('maps temperature and maxTokens into options.num_predict / temperature', async () => { + const fetch = mockFetch(() => jsonResponse({ response: '', done: true })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await b.generate('x', { accounting: ACCOUNTING, temperature: 0.5, maxTokens: 100 }); + const sent = JSON.parse(fetch.calls[0].init.body); + assert.deepStrictEqual(sent.options, { temperature: 0.5, num_predict: 100 }); + }); + + it("maps done_reason='length' to finishReason='length'", async () => { + const fetch = mockFetch(() => + jsonResponse({ response: 'cut', done: true, done_reason: 'length' }) + ); + const b = new OllamaBackend({ model: 'm' }, fetch); + const result = await b.generate('x', { accounting: ACCOUNTING }); + assert.strictEqual(result.output.finishReason, 'length'); + }); + + it('rejects a non-string content from /api/chat', async () => { + const fetch = mockFetch(() => + jsonResponse({ message: { role: 'assistant', content: 42 }, done: true }) + ); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects( + () => b.generate([{ role: 'user', content: 'q' }], { accounting: ACCOUNTING }), + /response content is not a string/ + ); + }); + + it('rejects a non-string response from /api/generate', async () => { + const fetch = mockFetch(() => jsonResponse({ response: { nested: 'obj' }, done: true })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects( + () => b.generate('x', { accounting: ACCOUNTING }), + /response content is not a string/ + ); + }); + + it('drops non-integer token counts from usage', async () => { + const fetch = mockFetch(() => + jsonResponse({ response: 'ok', done: true, prompt_eval_count: 1.5, eval_count: -3 }) + ); + const b = new OllamaBackend({ model: 'm' }, fetch); + const result = await b.generate('x', { accounting: ACCOUNTING }); + assert.strictEqual(result.usage.promptTokens, undefined); + assert.strictEqual(result.usage.completionTokens, undefined); + }); + }); + + describe('generateStream', () => { + it('yields a chunk per NDJSON line with deltaContent', async () => { + const fetch = mockFetch(() => + ndjsonResponse([ + { response: 'hello ' }, + { response: 'world' }, + { response: '', done: true, done_reason: 'stop' }, + ]) + ); + const b = new OllamaBackend({ model: 'm' }, fetch); + const chunks = []; + for await (const c of b.generateStream('q', { accounting: ACCOUNTING })) chunks.push(c); + assert.deepStrictEqual(chunks[0], { deltaContent: 'hello ' }); + assert.deepStrictEqual(chunks[1], { deltaContent: 'world' }); + assert.deepStrictEqual(chunks[2], { finishReason: 'stop' }); + }); + + it('uses /api/chat shape when input is a messages array', async () => { + const fetch = mockFetch(() => + ndjsonResponse([ + { message: { role: 'assistant', content: 'hi' } }, + { message: { role: 'assistant', content: '' }, done: true, done_reason: 'stop' }, + ]) + ); + const b = new OllamaBackend({ model: 'm' }, fetch); + const chunks = []; + for await (const c of b.generateStream([{ role: 'user', content: 'q' }], { + accounting: ACCOUNTING, + })) { + chunks.push(c); + } + assert.strictEqual(fetch.calls[0].url.endsWith('/api/chat'), true); + assert.strictEqual(chunks[0].deltaContent, 'hi'); + assert.strictEqual(chunks[1].finishReason, 'stop'); + }); + + it('handles NDJSON split across chunk boundaries', async () => { + // Emit a single JSON object across two stream chunks. + const body = new ReadableStream({ + start(controller) { + const enc = new TextEncoder(); + controller.enqueue(enc.encode('{"response":"hel')); + controller.enqueue(enc.encode('lo"}\n{"response":"","done":true}\n')); + controller.close(); + }, + }); + const fetch = mockFetch(() => new Response(body, { status: 200 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + const chunks = []; + for await (const c of b.generateStream('q', { accounting: ACCOUNTING })) chunks.push(c); + assert.strictEqual(chunks[0].deltaContent, 'hello'); + assert.strictEqual(chunks[1].finishReason, 'stop'); + }); + + it('throws OllamaBackendError on invalid NDJSON', async () => { + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('not-json\n')); + controller.close(); + }, + }); + const fetch = mockFetch(() => new Response(body, { status: 200 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects(async () => { + for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) { + /* no-op */ + } + }, OllamaBackendError); + }); + + it('uses a static message on invalid NDJSON (no upstream content in the thrown error)', async () => { + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode('oops\n')); + controller.close(); + }, + }); + const fetch = mockFetch(() => new Response(body, { status: 200 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + try { + for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) { + /* no-op */ + } + assert.fail('expected OllamaBackendError'); + } catch (err) { + assert.ok(err instanceof OllamaBackendError); + assert.ok(!err.message.includes(''), 'error message should not include upstream content'); + } + }); + + it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => { + // Emit > 1 MiB of bytes with no newline. + const huge = 'x'.repeat(1 << 20 + 1); + const body = new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(huge)); + controller.close(); + }, + }); + const fetch = mockFetch(() => new Response(body, { status: 200 })); + const b = new OllamaBackend({ model: 'm' }, fetch); + await assert.rejects(async () => { + for await (const _c of b.generateStream('q', { accounting: ACCOUNTING })) { + /* no-op */ + } + }, /NDJSON line exceeds/); + }); + }); + + describe('AbortSignal propagation', () => { + it('passes the caller signal straight through when no timeout is configured', async () => { + const ctrl = new AbortController(); + let seenSignal; + const fetch = mockFetch(({ init }) => { + seenSignal = init.signal; + return jsonResponse({ embeddings: [[0]] }); + }); + const b = new OllamaBackend({ model: 'm' }, fetch); + await b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal }); + assert.strictEqual(seenSignal, ctrl.signal); + }); + + it('composes caller signal with per-call timeout via AbortSignal.any', async () => { + const ctrl = new AbortController(); + let seenSignal; + const fetch = mockFetch(({ init }) => { + seenSignal = init.signal; + return jsonResponse({ embeddings: [[0]] }); + }); + const b = new OllamaBackend({ model: 'm', requestTimeoutMs: 10000 }, fetch); + await b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal }); + assert.ok(seenSignal instanceof AbortSignal); + // AbortSignal.any returns a new signal distinct from both inputs. + assert.notStrictEqual(seenSignal, ctrl.signal); + }); + + it('aborts when the caller signal aborts (composed-signal case)', async () => { + const ctrl = new AbortController(); + const fetch = mockFetch( + ({ init }) => + new Promise((_resolve, reject) => { + init.signal.addEventListener('abort', () => + reject(Object.assign(new Error('aborted'), { name: 'AbortError' })) + ); + }) + ); + const b = new OllamaBackend({ model: 'm', requestTimeoutMs: 10000 }, fetch); + const pending = b.embed('x', { accounting: ACCOUNTING, signal: ctrl.signal }); + ctrl.abort(); + await assert.rejects(pending, /aborted/); + }); + }); +}); + +describe('registerOllamaBackend', () => { + beforeEach(() => clearRegistry()); + + it('registers as an embedding backend under the logical name', () => { + registerOllamaBackend({ logicalName: 'fast', kind: 'embedding', config: { model: 'm' } }); + const b = resolveEmbedding('fast'); + assert.strictEqual(b.name, 'ollama'); + }); + + it('registers as a generative backend under the logical name', () => { + registerOllamaBackend({ logicalName: 'default', kind: 'generative', config: { model: 'm' } }); + const b = resolveGenerative('default'); + assert.strictEqual(b.name, 'ollama'); + }); +}); diff --git a/unitTests/resources/models/bootstrap.test.js b/unitTests/resources/models/bootstrap.test.js new file mode 100644 index 000000000..4a2ca40ac --- /dev/null +++ b/unitTests/resources/models/bootstrap.test.js @@ -0,0 +1,97 @@ +'use strict'; + +const assert = require('node:assert/strict'); +const { bootstrapModels } = require('#src/resources/models/bootstrap'); +const { + clearRegistry, + resolveEmbedding, + resolveGenerative, + ModelBackendNotFoundError, +} = require('#src/resources/models/backendRegistry'); + +describe('bootstrapModels', () => { + beforeEach(() => clearRegistry()); + + it('is a no-op when rootConfig is undefined/null', () => { + bootstrapModels(undefined); + bootstrapModels(null); + assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError); + }); + + it('is a no-op when rootConfig.models is absent', () => { + bootstrapModels({}); + assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError); + }); + + it('registers an ollama embedding entry under its logical name', () => { + bootstrapModels({ + models: { + embedding: { + fast: { backend: 'ollama', host: 'localhost:11434', model: 'nomic-embed-text' }, + }, + }, + }); + const backend = resolveEmbedding('fast'); + assert.strictEqual(backend.name, 'ollama'); + }); + + it('registers an ollama generative entry under its logical name', () => { + bootstrapModels({ + models: { + generative: { + default: { backend: 'ollama', host: 'localhost:11434', model: 'llama3.2' }, + }, + }, + }); + const backend = resolveGenerative('default'); + assert.strictEqual(backend.name, 'ollama'); + }); + + it('skips entries with unknown backend without throwing', () => { + bootstrapModels({ + models: { + embedding: { + default: { backend: 'magic-backend', model: 'm' }, + }, + generative: { + default: { backend: 'ollama', model: 'm' }, + }, + }, + }); + // The ollama entry on generative still registered. + assert.strictEqual(resolveGenerative('default').name, 'ollama'); + // The unknown-backend embedding entry was skipped, not registered. + assert.throws(() => resolveEmbedding('default'), ModelBackendNotFoundError); + }); + + it('skips entries that are not objects', () => { + bootstrapModels({ + models: { + embedding: { + bad: 'just a string', + good: { backend: 'ollama', model: 'm' }, + }, + }, + }); + assert.strictEqual(resolveEmbedding('good').name, 'ollama'); + assert.throws(() => resolveEmbedding('bad'), ModelBackendNotFoundError); + }); + + it('skips entries missing a backend field', () => { + bootstrapModels({ models: { embedding: { x: { model: 'm' } } } }); + assert.throws(() => resolveEmbedding('x'), ModelBackendNotFoundError); + }); + + it('registers multiple logical names independently', () => { + bootstrapModels({ + models: { + generative: { + default: { backend: 'ollama', host: 'a:1', model: 'mA' }, + fast: { backend: 'ollama', host: 'b:2', model: 'mB' }, + }, + }, + }); + assert.strictEqual(resolveGenerative('default').name, 'ollama'); + assert.strictEqual(resolveGenerative('fast').name, 'ollama'); + }); +}); diff --git a/unitTests/validation/configValidator.test.js b/unitTests/validation/configValidator.test.js index ead5294c9..684e16d2a 100644 --- a/unitTests/validation/configValidator.test.js +++ b/unitTests/validation/configValidator.test.js @@ -385,4 +385,107 @@ describe('Test configValidator module', () => { "Invalid logging.rotation.interval value. Value should be a number followed by unit e.g. '10D'" ); }); + + // #629 (Phase 2 of #510): models config block. + describe('models config', () => { + function baseConfig() { + return testUtils.deepClone(FAKE_CONFIG); + } + + it('validates clean when the models block is absent', () => { + const result = configValidator(baseConfig(), true); + expect(result.error).to.be.undefined; + expect(result.value.models).to.be.undefined; + }); + + it('accepts an empty models block', () => { + const config = baseConfig(); + config.models = {}; + const result = configValidator(config, true); + expect(result.error).to.be.undefined; + }); + + it('accepts an ollama embedding entry with host + model', () => { + const config = baseConfig(); + config.models = { + embedding: { + default: { backend: 'ollama', host: 'localhost:11434', model: 'nomic-embed-text' }, + }, + }; + const result = configValidator(config, true); + expect(result.error).to.be.undefined; + }); + + it('accepts a generative entry with requestTimeoutMs', () => { + const config = baseConfig(); + config.models = { + generative: { + fast: { backend: 'ollama', model: 'llama3.2', requestTimeoutMs: 30000 }, + }, + }; + const result = configValidator(config, true); + expect(result.error).to.be.undefined; + }); + + it('rejects entries missing a backend discriminator', () => { + const config = baseConfig(); + config.models = { embedding: { default: { model: 'm' } } }; + const result = configValidator(config, true); + expect(result.error).to.not.be.undefined; + expect(result.error.message).to.include('backend'); + }); + + it('rejects a non-numeric requestTimeoutMs', () => { + const config = baseConfig(); + config.models = { + generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: 'soon' } }, + }; + const result = configValidator(config, true); + expect(result.error).to.not.be.undefined; + }); + + it('rejects a negative requestTimeoutMs', () => { + const config = baseConfig(); + config.models = { + generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: -1 } }, + }; + const result = configValidator(config, true); + expect(result.error).to.not.be.undefined; + }); + + it('rejects requestTimeoutMs: 0 (omit the field for "no timeout")', () => { + const config = baseConfig(); + config.models = { + generative: { default: { backend: 'ollama', model: 'm', requestTimeoutMs: 0 } }, + }; + const result = configValidator(config, true); + expect(result.error).to.not.be.undefined; + }); + + it('rejects unknown fields inside a model entry (typo guard)', () => { + const config = baseConfig(); + config.models = { + generative: { default: { backend: 'ollama', model: 'm', bakend: 'oops' } }, + }; + const result = configValidator(config, true); + expect(result.error).to.not.be.undefined; + expect(result.error.message).to.include('bakend'); + }); + + it('accepts multiple logical names per kind', () => { + const config = baseConfig(); + config.models = { + embedding: { + default: { backend: 'ollama', model: 'm1' }, + high_quality: { backend: 'ollama', model: 'm2' }, + }, + generative: { + default: { backend: 'ollama', model: 'g1' }, + fast: { backend: 'ollama', model: 'g2' }, + }, + }; + const result = configValidator(config, true); + expect(result.error).to.be.undefined; + }); + }); }); From 1381bced97364f3cc9a3fa2ac0d224417a82e401 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 15:12:27 -0700 Subject: [PATCH 5/6] fix(models): CI green for Phase 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Run prettier on the four ollama-related files (CI Format Check missed these locally before push). - Defer the integration test's `OllamaBackend` import to a dynamic `await import(...)` inside `before()`. Statically importing from `components/ollama/` triggers a pre-existing CJS require cycle (`utility/common_utils.ts` ↔ `utility/logging/harper_logger.ts`) when loaded by `node --test`, which is fatal on Node 22+ (`ERR_REQUIRE_CYCLE_MODULE`). Other integration tests don't hit it because they only import from `@harperfast/integration-testing` and spawn Harper as a subprocess. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- components/ollama/index.ts | 16 +++--- .../server/ollama-backend.test.ts | 54 +++++++++++++------ resources/models/bootstrap.ts | 8 +-- unitTests/components/ollama/index.test.js | 30 +++-------- 4 files changed, 56 insertions(+), 52 deletions(-) diff --git a/components/ollama/index.ts b/components/ollama/index.ts index 723f578b3..aa24935b1 100644 --- a/components/ollama/index.ts +++ b/components/ollama/index.ts @@ -227,11 +227,7 @@ function buildOptionsBag(opts: BackendOpts): Record 0) out.options = options; if (opts.responseFormat === 'json') { out.format = 'json'; - } else if ( - opts.responseFormat && - typeof opts.responseFormat === 'object' && - 'schema' in opts.responseFormat - ) { + } else if (opts.responseFormat && typeof opts.responseFormat === 'object' && 'schema' in opts.responseFormat) { out.format = opts.responseFormat.schema; } return out; @@ -271,9 +267,7 @@ async function* readNdjson(body: ReadableStream): AsyncGenerator) { buf += decoder.decode(chunk, { stream: true }); if (buf.length > MAX_NDJSON_LINE_BYTES) { - throw new OllamaBackendError( - `Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline` - ); + throw new OllamaBackendError(`Ollama NDJSON line exceeds ${MAX_NDJSON_LINE_BYTES} bytes without a newline`); } let nl: number; while ((nl = buf.indexOf('\n')) >= 0) { @@ -318,7 +312,11 @@ async function parseJsonResponse(res: Response, endpoint: string): Promise * any of which would poison `SUM(prompt_tokens)`-style aggregates over * `hdb_model_calls`. */ -function assignFiniteTokenCount(usage: TokenUsage, key: 'promptTokens' | 'completionTokens' | 'embeddingTokens', value: unknown): void { +function assignFiniteTokenCount( + usage: TokenUsage, + key: 'promptTokens' | 'completionTokens' | 'embeddingTokens', + value: unknown +): void { if (typeof value !== 'number') return; if (!Number.isFinite(value) || value < 0 || !Number.isInteger(value)) return; usage[key] = value; diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts index d753417ed..69100d340 100644 --- a/integrationTests/server/ollama-backend.test.ts +++ b/integrationTests/server/ollama-backend.test.ts @@ -22,7 +22,26 @@ import { suite, test, before } from 'node:test'; import { strictEqual, ok } from 'node:assert/strict'; -import { OllamaBackend } from '../../components/ollama/index.ts'; +// NOTE: `OllamaBackend` is imported dynamically inside `before()` rather than +// at the top of the file. Statically importing it from `components/ollama/` +// triggers a pre-existing require cycle in Harper's CommonJS graph +// (`utility/common_utils.ts` ↔ `utility/logging/harper_logger.ts`) when this +// test file is loaded by `node --test`, which is fatal on Node 22+ (ERR_REQUIRE_CYCLE_MODULE). +// Other integration tests don't hit it because they only import the +// `@harperfast/integration-testing` package and spawn Harper as a subprocess. +// Deferring the import past the static graph build sidesteps the cycle. + +type OllamaBackendCtor = new ( + config: { host?: string; model?: string; requestTimeoutMs?: number }, + fetchImpl?: typeof fetch +) => { + embed: (input: string | string[], opts: object) => Promise<{ status: string; output: Float32Array[] }>; + generate: ( + input: unknown, + opts: object + ) => Promise<{ status: string; output: { content: string; finishReason: string } }>; + generateStream: (input: unknown, opts: object) => AsyncIterable<{ deltaContent?: string; finishReason?: string }>; +}; const OLLAMA_HOST = process.env.OLLAMA_HOST ?? 'http://localhost:11434'; const EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL ?? 'nomic-embed-text'; @@ -47,10 +66,11 @@ async function reachable(): Promise { const skip = !(await reachable()); suite('OllamaBackend against a real Ollama instance', { skip }, () => { - let backend: OllamaBackend; + let backend: InstanceType; - before(() => { - backend = new OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') }); + before(async () => { + const mod = (await import('../../components/ollama/index.ts')) as { OllamaBackend: OllamaBackendCtor }; + backend = new mod.OllamaBackend({ host: OLLAMA_HOST.replace(/^https?:\/\//, '') }); }); test('embed returns a non-empty Float32Array vector', async () => { @@ -87,10 +107,12 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => { }); test('generate via chat shape (messages array) produces non-empty content', async () => { - const result = await backend.generate( - [{ role: 'user', content: 'Reply with the single word OK.' }], - { accounting: ACCOUNTING, model: GENERATE_MODEL, maxTokens: 10, temperature: 0 } - ); + const result = await backend.generate([{ role: 'user', content: 'Reply with the single word OK.' }], { + accounting: ACCOUNTING, + model: GENERATE_MODEL, + maxTokens: 10, + temperature: 0, + }); strictEqual(result.status, 'completed'); ok(typeof result.output.content === 'string' && result.output.content.length > 0); }); @@ -114,13 +136,15 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => { test('AbortSignal cancels an in-flight stream', async () => { const ctrl = new AbortController(); - const iter = backend.generateStream('Write a long paragraph about the ocean.', { - accounting: ACCOUNTING, - model: GENERATE_MODEL, - signal: ctrl.signal, - maxTokens: 1000, - temperature: 0.5, - })[Symbol.asyncIterator](); + const iter = backend + .generateStream('Write a long paragraph about the ocean.', { + accounting: ACCOUNTING, + model: GENERATE_MODEL, + signal: ctrl.signal, + maxTokens: 1000, + temperature: 0.5, + }) + [Symbol.asyncIterator](); // Get one chunk to confirm the stream started, then abort. await iter.next(); ctrl.abort(); diff --git a/resources/models/bootstrap.ts b/resources/models/bootstrap.ts index d223bfa29..1cb4c6b04 100644 --- a/resources/models/bootstrap.ts +++ b/resources/models/bootstrap.ts @@ -69,17 +69,13 @@ function registerKind(kind: ModelKind, entries: Record | und // typo guards (`.unknown(false)` on modelEntrySchema) catch field-name // typos before this point; reaching here means `backend:` itself names // a type Harper doesn't ship a factory for in this version. - harperLogger.error( - `models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping` - ); + harperLogger.error(`models.${kind}.${logicalName}: unknown backend '${entry.backend ?? '(missing)'}'; skipping`); continue; } try { factory({ logicalName, kind, config: entry }); } catch (err) { - harperLogger.error( - `models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})` - ); + harperLogger.error(`models.${kind}.${logicalName}: registration failed (${(err as Error)?.message ?? err})`); } } } diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js index 08dc2815f..adebed83f 100644 --- a/unitTests/components/ollama/index.test.js +++ b/unitTests/components/ollama/index.test.js @@ -81,9 +81,7 @@ describe('OllamaBackend', () => { describe('embed', () => { it('POSTs to /api/embed with the configured model and Float32Array output', async () => { - const fetch = mockFetch(() => - jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 }) - ); + const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1, 0.2, 0.3]], prompt_eval_count: 3 })); const b = new OllamaBackend({ model: 'nomic-embed-text' }, fetch); const result = await b.embed('hello', { accounting: ACCOUNTING }); assert.strictEqual(result.status, 'completed'); @@ -160,10 +158,7 @@ describe('OllamaBackend', () => { it('raises OllamaBackendError when response vector count differs from input count', async () => { const fetch = mockFetch(() => jsonResponse({ embeddings: [[0.1]] })); const b = new OllamaBackend({ model: 'm' }, fetch); - await assert.rejects( - () => b.embed(['a', 'b'], { accounting: ACCOUNTING }), - /returned 1 vectors for 2 inputs/ - ); + await assert.rejects(() => b.embed(['a', 'b'], { accounting: ACCOUNTING }), /returned 1 vectors for 2 inputs/); }); it('raises OllamaBackendError when a vector contains non-finite values', async () => { @@ -231,10 +226,8 @@ describe('OllamaBackend', () => { assert.strictEqual(result.output.content, 'reply'); }); - it("prepends system as the first message when supplied via { messages, system }", async () => { - const fetch = mockFetch(() => - jsonResponse({ message: { role: 'assistant', content: '' }, done: true }) - ); + it('prepends system as the first message when supplied via { messages, system }', async () => { + const fetch = mockFetch(() => jsonResponse({ message: { role: 'assistant', content: '' }, done: true })); const b = new OllamaBackend({ model: 'llama3.2' }, fetch); await b.generate( { messages: [{ role: 'user', content: 'q' }], system: 'be helpful' }, @@ -271,18 +264,14 @@ describe('OllamaBackend', () => { }); it("maps done_reason='length' to finishReason='length'", async () => { - const fetch = mockFetch(() => - jsonResponse({ response: 'cut', done: true, done_reason: 'length' }) - ); + const fetch = mockFetch(() => jsonResponse({ response: 'cut', done: true, done_reason: 'length' })); const b = new OllamaBackend({ model: 'm' }, fetch); const result = await b.generate('x', { accounting: ACCOUNTING }); assert.strictEqual(result.output.finishReason, 'length'); }); it('rejects a non-string content from /api/chat', async () => { - const fetch = mockFetch(() => - jsonResponse({ message: { role: 'assistant', content: 42 }, done: true }) - ); + const fetch = mockFetch(() => jsonResponse({ message: { role: 'assistant', content: 42 }, done: true })); const b = new OllamaBackend({ model: 'm' }, fetch); await assert.rejects( () => b.generate([{ role: 'user', content: 'q' }], { accounting: ACCOUNTING }), @@ -293,10 +282,7 @@ describe('OllamaBackend', () => { it('rejects a non-string response from /api/generate', async () => { const fetch = mockFetch(() => jsonResponse({ response: { nested: 'obj' }, done: true })); const b = new OllamaBackend({ model: 'm' }, fetch); - await assert.rejects( - () => b.generate('x', { accounting: ACCOUNTING }), - /response content is not a string/ - ); + await assert.rejects(() => b.generate('x', { accounting: ACCOUNTING }), /response content is not a string/); }); it('drops non-integer token counts from usage', async () => { @@ -402,7 +388,7 @@ describe('OllamaBackend', () => { it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => { // Emit > 1 MiB of bytes with no newline. - const huge = 'x'.repeat(1 << 20 + 1); + const huge = 'x'.repeat(1 << (20 + 1)); const body = new ReadableStream({ start(controller) { controller.enqueue(new TextEncoder().encode(huge)); From ddb8c19f60575369d8c888a26ca40a7e62d3e191 Mon Sep 17 00:00:00 2001 From: Nathan Heskew Date: Wed, 20 May 2026 15:28:48 -0700 Subject: [PATCH 6/6] fix(models): real abort assertion + correct oversize-line byte count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from claude-bot's inline PR review on #651: - Integration `'AbortSignal cancels an in-flight stream'`: replace `ok(rejected || true)` (a tautology — asserts nothing) with a real termination check. Race the iterator drain against a 5 s deadline so the actual failure mode being guarded (hung stream after abort) fails the test instead of timing the suite out. - Unit `'throws OllamaBackendError when a stream line exceeds the byte cap'`: `1 << 20 + 1` evaluated to `1 << 21` (2 MiB) due to operator precedence — `+` binds tighter than `<<`. Prettier's autofix parenthesized the wrong side (`1 << (20 + 1)`, same value). Re-parenthesize to `(1 << 20) + 1` (1 MiB + 1 byte) — exactly one byte past the cap, the comment now matches the allocation, and the test memory footprint halves. Tracking: #629, #510 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../server/ollama-backend.test.ts | 35 ++++++++++++------- unitTests/components/ollama/index.test.js | 7 ++-- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/integrationTests/server/ollama-backend.test.ts b/integrationTests/server/ollama-backend.test.ts index 69100d340..eb0ee9fc1 100644 --- a/integrationTests/server/ollama-backend.test.ts +++ b/integrationTests/server/ollama-backend.test.ts @@ -148,18 +148,29 @@ suite('OllamaBackend against a real Ollama instance', { skip }, () => { // Get one chunk to confirm the stream started, then abort. await iter.next(); ctrl.abort(); - // Subsequent reads should reject (AbortError) — accept either rejection - // or premature done since fetch may swallow either path. - let rejected = false; - try { - while (true) { - const next = await iter.next(); - if (next.done) break; + // After abort, the iterator must terminate — either by rejecting + // (AbortError / abort-flavored error) or by reaching `done`. The + // real failure mode this guards against is the stream hanging, + // where neither happens. Race a 5 s deadline so a hang fails the + // test instead of timing the suite out. + const drain = (async () => { + try { + while (true) { + const next = await iter.next(); + if (next.done) return 'done' as const; + } + } catch (err) { + const name = (err as Error).name; + const isAbort = name === 'AbortError' || /abort/i.test(String(err)); + return isAbort ? ('aborted' as const) : ('errored' as const); } - } catch (err) { - rejected = (err as Error).name === 'AbortError' || /abort/i.test(String(err)); - } - // Either an abort error fired, or the iterator terminated quickly post-abort. - ok(rejected || true); + })(); + const HANG = Symbol('hang'); + const deadline = new Promise((resolve) => setTimeout(() => resolve(HANG), 5000)); + const outcome = await Promise.race([drain, deadline]); + ok( + outcome === 'done' || outcome === 'aborted', + `expected abort to terminate stream (done or AbortError); got ${String(outcome)}` + ); }); }); diff --git a/unitTests/components/ollama/index.test.js b/unitTests/components/ollama/index.test.js index adebed83f..da0a2f71e 100644 --- a/unitTests/components/ollama/index.test.js +++ b/unitTests/components/ollama/index.test.js @@ -387,8 +387,11 @@ describe('OllamaBackend', () => { }); it('throws OllamaBackendError when a stream line exceeds the byte cap', async () => { - // Emit > 1 MiB of bytes with no newline. - const huge = 'x'.repeat(1 << (20 + 1)); + // Emit just over 1 MiB of bytes with no newline. The parens matter: + // `+` binds tighter than `<<`, so the original `1 << 20 + 1` (and + // prettier's autofix `1 << (20 + 1)`) both evaluate to `1 << 21` + // (2 MiB). We want `(1 << 20) + 1` — exactly one byte past the cap. + const huge = 'x'.repeat((1 << 20) + 1); const body = new ReadableStream({ start(controller) { controller.enqueue(new TextEncoder().encode(huge));