diff --git a/CHANGELOG.md b/CHANGELOG.md index 58e9c49..67914b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,20 @@ All notable changes to the TypeScript package will be documented in this file. +## [0.28.1] - 2026-06-10 + +### Fixed + +- **A retrieve call with `semantic`/`rerank` no longer kills the MCP server when the optional `@huggingface/transformers` package is missing**: the rejection previously escaped the stdio serve loop unhandled, terminating the process mid-call so agents saw an infinite spinner instead of an error. Retrieve failures now return an MCP `isError` tool result the agent can read and react to, and the serve loop is hardened so no handler rejection can tear down the server. +- **A project-local `npm install @huggingface/transformers` now actually enables semantic/rerank**: npx-launched and globally installed servers resolve the optional package from the project root (derived from the graph path) in addition to madar's own installation, and the install hint in the error message now points at instructions that work for those installs. +- **Failed semantic model loads no longer poison the pipeline cache**: a rejected load is evicted, so installing the package and retrying succeeds without restarting the server. + +### Added + +- **Semantic/rerank capability gating in the retrieve tool schema**: `tools/list` omits the `semantic`, `semantic_model`, `rerank`, and `rerank_model` fields when the optional package is not resolvable, so agents never request a capability the machine lacks. +- **`madar doctor` now reports semantic/rerank availability** with the exact enable command, without affecting overall health status. +- **Semantic model loads are bounded by a timeout** (`MADAR_MODEL_LOAD_TIMEOUT_MS`, default 120s) so a stalled first-use model download cannot block the serial stdio request loop indefinitely. + ## [0.28.0] - 2026-06-10 ### Added diff --git a/README.md b/README.md index f95b51f..82e45b3 100644 --- a/README.md +++ b/README.md @@ -200,11 +200,13 @@ It does not record prompt text, answer text, source paths, source content, or re ## What's New -Current version: `0.28.0`. +Current version: `0.28.1`. -This release promotes the public benchmark work to a proof-backed stable release: six public TypeScript `explain-runtime` legacy rows now have checked-in `full_win` receipts, strict runtime-proof gates, direct-evidence answer checks, scoped benchmark roots, and share-safe reports. It also includes retrieval and extraction improvements for runtime handoffs, source-visible framework flows, and benchmark reproducibility. +`0.28.1` is a stability hotfix: a `retrieve` call with `semantic`/`rerank` no longer crashes the MCP server when the optional `@huggingface/transformers` package is missing, a project-local install of that package now enables semantic/rerank under npx-launched or global installs, and the retrieve tool schema only advertises semantic fields when the capability is actually available. -Read the full notes in the [0.28.0 changelog](https://github.com/mohanagy/madar/blob/main/CHANGELOG.md#0280---2026-06-10). +`0.28.0` promoted the public benchmark work to a proof-backed stable release: six public TypeScript `explain-runtime` legacy rows now have checked-in `full_win` receipts, strict runtime-proof gates, direct-evidence answer checks, scoped benchmark roots, and share-safe reports. It also includes retrieval and extraction improvements for runtime handoffs, source-visible framework flows, and benchmark reproducibility. + +Read the full notes in the [0.28.1 changelog](https://github.com/mohanagy/madar/blob/main/CHANGELOG.md#0281---2026-06-10). ## Docs diff --git a/docs/mcp-registry/server.json b/docs/mcp-registry/server.json index 51e2540..befa835 100644 --- a/docs/mcp-registry/server.json +++ b/docs/mcp-registry/server.json @@ -9,13 +9,13 @@ "source": "github", "url": "https://github.com/mohanagy/madar" }, - "version": "0.28.0", + "version": "0.28.1", "packages": [ { "registryType": "npm", "registryBaseUrl": "https://registry.npmjs.org", "identifier": "@lubab/madar", - "version": "0.28.0", + "version": "0.28.1", "runtimeHint": "npx", "transport": { "type": "stdio" diff --git a/package-lock.json b/package-lock.json index f152451..68c946a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@lubab/madar", - "version": "0.28.0", + "version": "0.28.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@lubab/madar", - "version": "0.28.0", + "version": "0.28.1", "license": "MIT", "dependencies": { "@vscode/tree-sitter-wasm": "^0.3.1", diff --git a/package.json b/package.json index 864e689..2a5e8b0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lubab/madar", - "version": "0.28.0", + "version": "0.28.1", "description": "Stop AI coding agents from rediscovering large TypeScript/Node repos. Madar compiles task-aware local context packs from what runs for this task.", "license": "MIT", "author": "mohanagy", diff --git a/src/infrastructure/doctor.ts b/src/infrastructure/doctor.ts index fc28e98..0836be2 100644 --- a/src/infrastructure/doctor.ts +++ b/src/infrastructure/doctor.ts @@ -9,6 +9,7 @@ import { resolveOpencodeConfigPath, } from './install.js' import { analyzeGraphContextFreshness, graphFreshnessStatusLabel, type GraphContextFreshnessStatus } from '../runtime/freshness.js' +import { isSemanticRuntimeAvailable } from '../runtime/semantic.js' import { findPackageRoot, readPackageVersion } from '../shared/package-metadata.js' const MADAR_SECTION_MARKER = '## madar' @@ -47,10 +48,18 @@ export interface DoctorReport { graph: GraphCheck agents: AgentCheck[] mcpChecks: McpCheck[] + /** Availability of the optional semantic/rerank runtime. Informational + * only — never part of the `healthy` computation. */ + semantic: SemanticCheck nextCommands: string[] healthy: boolean } +interface SemanticCheck { + available: boolean + detail: string +} + interface JsonObject { [key: string]: unknown } @@ -526,11 +535,17 @@ export function buildDoctorReport(options: DoctorCommandOptions = {}): DoctorRep const mcpChecks = [claudeMcp, cursorMcp, copilotMcp] + const semanticAvailable = isSemanticRuntimeAvailable(projectDir) + const semantic: SemanticCheck = semanticAvailable + ? { available: true, detail: 'optional @huggingface/transformers resolved' } + : { available: false, detail: 'optional — run `npm install @huggingface/transformers` in this project to enable semantic/rerank' } + const partialReport = { packageVersion, graph, agents, mcpChecks, + semantic, } const nextCommands = computeNextCommands(partialReport) const healthy = graph.exists && graph.freshness === 'fresh' && agents.every((agent) => agent.status === 'configured') && mcpChecks.every((check) => check.status === 'ok') @@ -578,6 +593,7 @@ export function runDoctorCommand(options: DoctorCommandOptions = {}): string { for (const check of report.mcpChecks) { lines.push(` - ${check.label}: ${check.status} (${check.configPath}; ${check.reason})`) } + lines.push(`- semantic/rerank: ${report.semantic.available ? 'available' : 'unavailable'} (${report.semantic.detail})`) if (report.nextCommands.length === 0) { lines.push('- next commands: none') diff --git a/src/runtime/retrieve.ts b/src/runtime/retrieve.ts index 1a2501c..846e542 100644 --- a/src/runtime/retrieve.ts +++ b/src/runtime/retrieve.ts @@ -111,6 +111,9 @@ export interface RetrieveOptions { semanticModel?: string rerank?: boolean rerankerModel?: string + /** Project root used to resolve the optional transformers package when the + * server itself runs from elsewhere (npx cache, global install). */ + projectRoot?: string /** #75 manual override for the retrieval gate. When set (0-5), the gate * bypasses heuristic classification and emits a decision with reason * 'manual override' at the supplied level. Caller-side surface for the @@ -6369,7 +6372,10 @@ export async function retrieveContextAsync(graph: KnowledgeGraph, options: Retri semanticScores = await rankCandidatesBySemanticSimilarity( options.question, [...candidatesById.values()].map((node) => ({ id: node.id, text: semanticTextForNode(node) })), - options.semanticModel ? { model: options.semanticModel } : {}, + { + ...(options.semanticModel ? { model: options.semanticModel } : {}), + ...(options.projectRoot ? { projectRoot: options.projectRoot } : {}), + }, ) } @@ -6398,7 +6404,10 @@ export async function retrieveContextAsync(graph: KnowledgeGraph, options: Retri rerankScores = await rerankCandidatesWithCrossEncoder( options.question, candidatePool.map((node) => ({ id: node.id, text: semanticTextForNode(node) })), - options.rerankerModel ? { model: options.rerankerModel } : {}, + { + ...(options.rerankerModel ? { model: options.rerankerModel } : {}), + ...(options.projectRoot ? { projectRoot: options.projectRoot } : {}), + }, ) } diff --git a/src/runtime/semantic.ts b/src/runtime/semantic.ts index c7cee3f..03ea0f5 100644 --- a/src/runtime/semantic.ts +++ b/src/runtime/semantic.ts @@ -1,3 +1,8 @@ +import { existsSync, readFileSync } from 'node:fs' +import { createRequire } from 'node:module' +import { dirname, join, resolve } from 'node:path' +import { pathToFileURL } from 'node:url' + import { isRecord } from '../shared/guards.js' export interface SemanticCandidate { @@ -8,14 +13,24 @@ export interface SemanticCandidate { export interface SemanticRuntimeOptions { model?: string batchSize?: number + /** Project root to resolve the optional transformers package from, in + * addition to madar's own installation. Needed because npx-launched + * servers run from the npx cache, where a project-local + * `npm install @huggingface/transformers` is otherwise invisible. */ + projectRoot?: string } export const DEFAULT_SEMANTIC_MODEL = 'Xenova/all-MiniLM-L6-v2' export const DEFAULT_RERANK_MODEL = 'Xenova/ms-marco-MiniLM-L-6-v2' const OPTIONAL_TRANSFORMERS_PACKAGE = '@huggingface/transformers' +const DEFAULT_MODEL_LOAD_TIMEOUT_MS = 120_000 type TransformerPipeline = (input: unknown, options?: Record) => Promise +interface TransformersModule { + pipeline: (task: string, model: string) => Promise +} + const pipelineCache = new Map>() function numericArrayFromValue(value: unknown): number[] | null { @@ -97,42 +112,178 @@ function cosineSimilarity(left: readonly number[], right: readonly number[]): nu return dot / (Math.sqrt(leftMagnitude) * Math.sqrt(rightMagnitude)) } -async function loadPipeline(task: string, model: string): Promise { - const cacheKey = `${task}\u0000${model}` +async function loadPipeline(task: string, model: string, projectRoot?: string): Promise { + const resolvedRoot = resolve(projectRoot ?? process.cwd()) + const cacheKey = `${task}\u0000${model}\u0000${resolvedRoot}` const cached = pipelineCache.get(cacheKey) if (cached) { return cached } - const pending = (async () => { + const pending = withLoadTimeout((async () => { try { - const transformersModule = await import(OPTIONAL_TRANSFORMERS_PACKAGE) as { - pipeline: (task: string, model: string) => Promise - } + const transformersModule = await importTransformersModule(resolvedRoot) return await transformersModule.pipeline(task, model) } catch (error) { const message = error instanceof Error ? error.message : String(error) + if (message.startsWith('[madar]')) { + throw error + } if (isMissingOptionalTransformersDependency(message)) { - throw new Error( - `[madar] Semantic retrieval requires the optional package '${OPTIONAL_TRANSFORMERS_PACKAGE}'. Install it with \`npm install ${OPTIONAL_TRANSFORMERS_PACKAGE}\` and rerun with --semantic or --rerank.`, - ) + throw missingTransformersError() } throw new Error(`[madar] Failed to load local ${task} model '${model}': ${message}`) } - })() + })(), task, model) pipelineCache.set(cacheKey, pending) + pending.catch(() => { + if (pipelineCache.get(cacheKey) === pending) { + pipelineCache.delete(cacheKey) + } + }) return pending } +function missingTransformersError(): Error { + return new Error( + `[madar] Semantic retrieval requires the optional package '${OPTIONAL_TRANSFORMERS_PACKAGE}'. Run \`npm install ${OPTIONAL_TRANSFORMERS_PACKAGE}\` in your project root (madar resolves it from the project as well as its own installation), then retry with --semantic or --rerank.`, + ) +} + +function findProjectTransformersDir(startDir: string): string | null { + let current = resolve(startDir) + for (;;) { + const candidate = join(current, 'node_modules', '@huggingface', 'transformers') + if (existsSync(join(candidate, 'package.json'))) { + return candidate + } + const parent = dirname(current) + if (parent === current) { + return null + } + current = parent + } +} + +function entryFromExportValue(value: unknown): string | null { + if (typeof value === 'string') { + return value + } + if (isRecord(value)) { + for (const condition of ['import', 'node', 'default']) { + const nested = entryFromExportValue(value[condition]) + if (nested) { + return nested + } + } + } + return null +} + +function moduleEntryFromManifest(manifest: Record): string { + const exportsField = manifest.exports + const rootExport = isRecord(exportsField) && Object.keys(exportsField).some((key) => key.startsWith('.')) + ? exportsField['.'] + : exportsField + const fromExports = entryFromExportValue(rootExport) + if (fromExports) { + return fromExports + } + if (typeof manifest.module === 'string') { + return manifest.module + } + if (typeof manifest.main === 'string') { + return manifest.main + } + return 'index.js' +} + +async function importProjectTransformers(projectRoot: string): Promise { + const packageDir = findProjectTransformersDir(projectRoot) + if (!packageDir) { + return null + } + + const manifest: unknown = JSON.parse(readFileSync(join(packageDir, 'package.json'), 'utf8')) + if (!isRecord(manifest)) { + return null + } + const entryPath = join(packageDir, moduleEntryFromManifest(manifest)) + return await import(pathToFileURL(entryPath).href) as TransformersModule +} + +async function importTransformersModule(projectRoot: string): Promise { + try { + return await import(OPTIONAL_TRANSFORMERS_PACKAGE) as TransformersModule + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + if (!isMissingOptionalTransformersDependency(message)) { + throw error + } + const projectModule = await importProjectTransformers(projectRoot) + if (projectModule) { + return projectModule + } + throw missingTransformersError() + } +} + +function modelLoadTimeoutMs(): number { + const raw = Number.parseInt(process.env.MADAR_MODEL_LOAD_TIMEOUT_MS ?? '', 10) + return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_MODEL_LOAD_TIMEOUT_MS +} + +async function withLoadTimeout(work: Promise, task: string, model: string): Promise { + const timeoutMs = modelLoadTimeoutMs() + let timer: NodeJS.Timeout | undefined + const timeout = new Promise((_, reject) => { + timer = setTimeout(() => { + reject(new Error( + `[madar] Timed out loading ${task} model '${model}' after ${Math.round(timeoutMs / 1000)}s. Override with MADAR_MODEL_LOAD_TIMEOUT_MS or retry without --semantic/--rerank.`, + )) + }, timeoutMs) + timer.unref?.() + }) + try { + return await Promise.race([work, timeout]) + } finally { + clearTimeout(timer) + } +} + function isMissingOptionalTransformersDependency(message: string): boolean { + if (!message.includes(OPTIONAL_TRANSFORMERS_PACKAGE)) { + return false + } return ( message.includes(`Cannot find package '${OPTIONAL_TRANSFORMERS_PACKAGE}'`) || message.includes(`Cannot find module '${OPTIONAL_TRANSFORMERS_PACKAGE}'`) || - (message.includes('ERR_MODULE_NOT_FOUND') && message.includes(OPTIONAL_TRANSFORMERS_PACKAGE)) + message.includes('ERR_MODULE_NOT_FOUND') || + // Bundler-flavoured resolution failures (e.g. vite/vitest dev transforms). + /could not resolve|failed to resolve|failed to load/i.test(message) ) } +/** True when the optional transformers package is resolvable, either from + * madar's own installation or from the given project root. Used to gate the + * semantic/rerank tool-schema fields and the doctor report. */ +export function isSemanticRuntimeAvailable(projectRoot?: string): boolean { + try { + const require = createRequire(import.meta.url) + require.resolve(`${OPTIONAL_TRANSFORMERS_PACKAGE}/package.json`) + return true + } catch (error) { + const code = isRecord(error) && typeof (error as { code?: unknown }).code === 'string' ? (error as { code: string }).code : '' + // PATH_NOT_EXPORTED still proves the package is installed; only a true + // module-not-found means it is absent from madar's own tree. + if (code === 'ERR_PACKAGE_PATH_NOT_EXPORTED') { + return true + } + } + return findProjectTransformersDir(projectRoot ?? process.cwd()) !== null +} + function classificationScore(output: unknown): number { if (isRecord(output) && typeof output.score === 'number' && Number.isFinite(output.score)) { return output.score @@ -170,7 +321,7 @@ export async function rankCandidatesBySemanticSimilarity( return new Map() } - const embedder = await loadPipeline('feature-extraction', options.model ?? DEFAULT_SEMANTIC_MODEL) + const embedder = await loadPipeline('feature-extraction', options.model ?? DEFAULT_SEMANTIC_MODEL, options.projectRoot) const questionVector = vectorFromOutput(await embedder(question, { pooling: 'mean', normalize: true })) const batchSize = options.batchSize ?? 32 const scores = new Map() @@ -199,7 +350,7 @@ export async function rerankCandidatesWithCrossEncoder( return new Map() } - const reranker = await loadPipeline('text-classification', options.model ?? DEFAULT_RERANK_MODEL) + const reranker = await loadPipeline('text-classification', options.model ?? DEFAULT_RERANK_MODEL, options.projectRoot) const outputs = await reranker( candidates.map((candidate) => ({ text: question, diff --git a/src/runtime/stdio-server.ts b/src/runtime/stdio-server.ts index f600ea5..0bbfd3c 100644 --- a/src/runtime/stdio-server.ts +++ b/src/runtime/stdio-server.ts @@ -18,6 +18,7 @@ import { type ResourceSessionState, } from './stdio/resources.js' import { handleToolCall as handleToolCallRequest } from './stdio/tools.js' +import { isSemanticRuntimeAvailable } from './semantic.js' import { communitiesFromGraph, getCommunity, @@ -447,6 +448,13 @@ function textToolResult(text: string): { content: Array<{ type: 'text'; text: st } } +function errorToolResult(text: string): { content: Array<{ type: 'text'; text: string }>; isError: true } { + return { + content: [{ type: 'text', text }], + isError: true, + } +} + function loadGraphCached(graphPath: string): ReturnType { const safeGraphPath = validateGraphPath(graphPath) const currentGraphStat = statSync(safeGraphPath) @@ -628,7 +636,11 @@ export function handleStdioRequest( }) case 'tools/list': { const profile = resolveToolProfileFromEnv() - return ok(id, { tools: activeMcpTools(profile) }) + // Only advertise semantic/rerank params when the optional transformers + // package is actually resolvable on this machine — agents cannot pass + // parameters that are absent from the schema. + const semanticAvailable = isSemanticRuntimeAvailable(dirname(graphPath)) + return ok(id, { tools: activeMcpTools(profile, { semanticAvailable }) }) } case 'tools/call': { const profile = resolveToolProfileFromEnv() @@ -644,6 +656,7 @@ export function handleStdioRequest( ok, failure, textToolResult, + errorToolResult, stringParam, stringParamAlias, numberParamAlias, @@ -838,8 +851,16 @@ export async function serveGraphStdio(options: ServeGraphStdioOptions): Promise< continue } - emitResourceNotifications(output, options.graphPath, sessionState) - const response = await Promise.resolve(handleStdioRequest(options.graphPath, payload, sessionState)) + let response: StdioResponse | null + try { + emitResourceNotifications(output, options.graphPath, sessionState) + response = await Promise.resolve(handleStdioRequest(options.graphPath, payload, sessionState)) + } catch (error) { + // A rejected handler must never tear down the whole stdio server: every + // request gets an answer and the loop keeps serving (#crash). + const message = error instanceof Error ? error.message : 'Request failed' + response = failure(requestId(payload as StdioRequest), JSONRPC_SERVER_ERROR, message) + } if (response) { if (response.error) { emitLogNotification(output, sessionState, 'error', { message: response.error.message, code: response.error.code }) diff --git a/src/runtime/stdio/definitions.ts b/src/runtime/stdio/definitions.ts index c29909c..d6e405f 100644 --- a/src/runtime/stdio/definitions.ts +++ b/src/runtime/stdio/definitions.ts @@ -408,12 +408,40 @@ export const CORE_TOOL_NAMES = ['retrieve', 'impact', 'call_chain', 'community_o export type McpCoreToolName = (typeof CORE_TOOL_NAMES)[number] -export function activeMcpTools(profile: McpToolProfile = 'core'): McpToolDefinition[] { - if (profile === 'full') { - return MCP_TOOLS +/** Retrieve params that only function when the optional + * @huggingface/transformers package is resolvable. */ +const SEMANTIC_RETRIEVE_FIELDS = ['semantic', 'semantic_model', 'rerank', 'rerank_model'] as const + +export interface ActiveMcpToolsOptions { + /** When false, semantic/rerank fields are stripped from the retrieve + * schema so agents never request a capability this machine lacks. */ + semanticAvailable?: boolean +} + +function withoutSemanticFields(tool: McpToolDefinition): McpToolDefinition { + if (tool.name !== 'retrieve') { + return tool + } + const hidden = new Set(SEMANTIC_RETRIEVE_FIELDS) + const properties = Object.fromEntries( + Object.entries(tool.inputSchema.properties).filter(([key]) => !hidden.has(key)), + ) + return { + ...tool, + inputSchema: { + ...tool.inputSchema, + properties, + }, } +} + +export function activeMcpTools(profile: McpToolProfile = 'core', options: ActiveMcpToolsOptions = {}): McpToolDefinition[] { const core = new Set(CORE_TOOL_NAMES) - return MCP_TOOLS.filter((tool) => core.has(tool.name)) + const tools = profile === 'full' ? MCP_TOOLS : MCP_TOOLS.filter((tool) => core.has(tool.name)) + if (options.semanticAvailable === false) { + return tools.map((tool) => withoutSemanticFields(tool)) + } + return tools } export function resolveToolProfileFromEnv(env: NodeJS.ProcessEnv = process.env): McpToolProfile { diff --git a/src/runtime/stdio/tools.ts b/src/runtime/stdio/tools.ts index 391a069..110ab62 100644 --- a/src/runtime/stdio/tools.ts +++ b/src/runtime/stdio/tools.ts @@ -96,6 +96,7 @@ interface ToolHelpers { ok(id: string | number | null, result: unknown): StdioResponse failure(id: string | number | null, code: number, message: string): StdioResponse textToolResult(text: string): { content: Array<{ type: 'text'; text: string }> } + errorToolResult(text: string): { content: Array<{ type: 'text'; text: string }>; isError: true } stringParam(params: unknown, key: string): string | null stringParamAlias(params: unknown, keys: readonly string[]): string | null numberParamAlias(params: unknown, keys: readonly string[], options?: { min?: number; max?: number }): number | null @@ -1342,6 +1343,7 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa ...(retrieveRerankModel ? { rerankerModel: retrieveRerankModel } : {}), ...(retrieveLevelTyped !== null ? { retrievalLevel: retrieveLevelTyped } : {}), ...(effectiveRetrieveStrategy ? { retrievalStrategy: effectiveRetrieveStrategy } : {}), + projectRoot: dirname(resolve(graphPath)), }) : Promise.resolve(retrieveContext(graph, { question, budget: retrieveBudget, @@ -1379,6 +1381,12 @@ export function handleToolCall(id: string | number | null, graphPath: string, pa ...payload, evidence: evidenceForRetrievePayload(result, graphPath), }))) + }).catch((error: unknown) => { + // A rejected retrieve (e.g. missing optional semantic dependency) must + // surface as an MCP tool error the agent can read and react to — + // never as an unhandled rejection that kills the server (#crash). + const message = error instanceof Error ? error.message : 'retrieve failed' + return helpers.ok(id, helpers.errorToolResult(message)) }) } case 'context_pack': { diff --git a/tests/unit/stdio-semantic-resilience.test.ts b/tests/unit/stdio-semantic-resilience.test.ts new file mode 100644 index 0000000..2b52491 --- /dev/null +++ b/tests/unit/stdio-semantic-resilience.test.ts @@ -0,0 +1,313 @@ +import { mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { PassThrough } from 'node:stream' + +import { beforeEach, describe, expect, it, vi } from 'vitest' + +interface JsonRpcLine { + id?: string | number | null + method?: string + result?: { + isError?: boolean + content?: Array<{ type: string; text: string }> + tools?: Array<{ name: string; inputSchema: { properties: Record } }> + } + error?: { code: number; message: string } +} + +function writeGraphFixture(root: string): string { + const outDir = join(root, 'out') + mkdirSync(outDir, { recursive: true }) + const graphPath = join(outDir, 'graph.json') + writeFileSync( + graphPath, + JSON.stringify({ + nodes: [ + { + id: 'ledger_repo', + label: 'LedgerRepository', + source_file: 'ledger.ts', + source_location: 'L4-L6', + file_type: 'code', + community: 0, + snippet: 'class LedgerRepository {\n saveInvoiceHistory() {}\n}', + }, + { + id: 'logger', + label: 'Logger', + source_file: 'logger.ts', + source_location: 'L1-L3', + file_type: 'code', + community: 1, + snippet: 'class Logger {\n info() {}\n}', + }, + ], + edges: [], + hyperedges: [], + }), + 'utf8', + ) + return graphPath +} + +function installTransformersStub(root: string): void { + const packageDir = join(root, 'node_modules', '@huggingface', 'transformers') + mkdirSync(packageDir, { recursive: true }) + writeFileSync( + join(packageDir, 'package.json'), + JSON.stringify({ + name: '@huggingface/transformers', + version: '0.0.0-test', + type: 'module', + main: 'index.js', + }), + 'utf8', + ) + writeFileSync( + join(packageDir, 'index.js'), + [ + 'export async function pipeline(task, model) {', + " if (task === 'text-classification') {", + ' return async (input) => (Array.isArray(input) ? input : [input]).map((pair) => [{', + " label: 'RELEVANT',", + " score: typeof pair?.text_pair === 'string' && pair.text_pair.includes('Ledger') ? 0.9 : 0.2,", + ' }])', + ' }', + ' return async (input) => (Array.isArray(input) ? input : [input]).map(() => ({ data: [1, 0] }))', + '}', + '', + ].join('\n'), + 'utf8', + ) +} + +function makeTempRoot(): string { + return mkdtempSync(join(tmpdir(), 'madar-semantic-resilience-')) +} + +function retrieveCall(id: number, args: Record): Record { + return { + jsonrpc: '2.0', + id, + method: 'tools/call', + params: { name: 'retrieve', arguments: { question: 'ledger invoice history', budget: 2000, ...args } }, + } +} + +async function waitForResponse(lines: () => JsonRpcLine[], id: number, timeoutMs = 5000): Promise { + const start = Date.now() + for (;;) { + const found = lines().find((line) => line.id === id) + if (found) { + return found + } + if (Date.now() - start > timeoutMs) { + throw new Error(`Timed out waiting for response id=${id}`) + } + await new Promise((resolveSleep) => setTimeout(resolveSleep, 20)) + } +} + +describe('semantic optional dependency resilience', () => { + beforeEach(() => { + vi.resetModules() + vi.clearAllMocks() + }) + + it('retrieve without semantic works when @huggingface/transformers is not installed', async () => { + const root = makeTempRoot() + try { + const graphPath = writeGraphFixture(root) + const { handleStdioRequest } = await import('../../src/runtime/stdio-server.js') + + const response = (await Promise.resolve(handleStdioRequest(graphPath, retrieveCall(1, {})))) as JsonRpcLine + + expect(response?.error).toBeUndefined() + expect(response?.result?.isError).toBeUndefined() + expect(response?.result?.content?.[0]?.text).toContain('LedgerRepository') + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it('rerank without the package returns isError and the server keeps answering', async () => { + const root = makeTempRoot() + try { + const graphPath = writeGraphFixture(root) + const { serveGraphStdio } = await import('../../src/runtime/stdio-server.js') + + const input = new PassThrough() + const output = new PassThrough() + const errorOutput = new PassThrough() + errorOutput.resume() + const chunks: string[] = [] + output.on('data', (chunk) => chunks.push(String(chunk))) + const parsedLines = (): JsonRpcLine[] => + chunks + .join('') + .split('\n') + .filter((line) => line.trim().length > 0) + .map((line) => { + try { + return JSON.parse(line) as JsonRpcLine + } catch { + return {} + } + }) + + const serverDone = serveGraphStdio({ graphPath, input, output, errorOutput }) + + input.write(`${JSON.stringify(retrieveCall(1, { rerank: true }))}\n`) + const first = await waitForResponse(parsedLines, 1) + expect(first.error).toBeUndefined() + expect(first.result?.isError).toBe(true) + expect(first.result?.content?.[0]?.text).toContain('@huggingface/transformers') + + input.write(`${JSON.stringify(retrieveCall(2, {}))}\n`) + const second = await waitForResponse(parsedLines, 2) + expect(second.error).toBeUndefined() + expect(second.result?.isError).toBeUndefined() + expect(second.result?.content?.[0]?.text).toContain('LedgerRepository') + + input.end() + await serverDone + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it('resolves a project-local @huggingface/transformers install for rerank', async () => { + const root = makeTempRoot() + try { + const graphPath = writeGraphFixture(root) + installTransformersStub(root) + const { handleStdioRequest } = await import('../../src/runtime/stdio-server.js') + + const response = (await Promise.resolve(handleStdioRequest(graphPath, retrieveCall(1, { rerank: true })))) as JsonRpcLine + + expect(response?.error).toBeUndefined() + expect(response?.result?.isError).toBeUndefined() + expect(response?.result?.content?.[0]?.text).toContain('LedgerRepository') + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it('reranks via a project-local install passed as projectRoot', async () => { + const root = makeTempRoot() + try { + installTransformersStub(root) + const { rerankCandidatesWithCrossEncoder } = await import('../../src/runtime/semantic.js') + + const scores = await rerankCandidatesWithCrossEncoder( + 'where is invoice history stored', + [ + { id: 'ledger_repo', text: 'LedgerRepository persists invoice history' }, + { id: 'logger', text: 'Logger telemetry' }, + ], + { projectRoot: root, model: 'stub-model-project-local' }, + ) + + expect(scores.get('ledger_repo')).toBeGreaterThan(scores.get('logger') ?? 0) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it('does not poison pipelineCache after a failed load', async () => { + const root = makeTempRoot() + try { + const { rerankCandidatesWithCrossEncoder } = await import('../../src/runtime/semantic.js') + const candidates = [{ id: 'ledger_repo', text: 'LedgerRepository persists invoice history' }] + const options = { projectRoot: root, model: 'stub-model-eviction' } + + await expect(rerankCandidatesWithCrossEncoder('invoice history', candidates, options)) + .rejects.toThrow(/@huggingface\/transformers/) + + installTransformersStub(root) + + const scores = await rerankCandidatesWithCrossEncoder('invoice history', candidates, options) + expect(scores.get('ledger_repo')).toBeGreaterThan(0) + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) + + it('activeMcpTools omits semantic fields when unavailable and keeps them when available', async () => { + const { activeMcpTools } = await import('../../src/runtime/stdio/definitions.js') + + const findRetrieve = (tools: ReturnType) => + tools.find((tool) => tool.name === 'retrieve') + + const gated = findRetrieve(activeMcpTools('core', { semanticAvailable: false })) + expect(gated?.inputSchema.properties).not.toHaveProperty('semantic') + expect(gated?.inputSchema.properties).not.toHaveProperty('semantic_model') + expect(gated?.inputSchema.properties).not.toHaveProperty('rerank') + expect(gated?.inputSchema.properties).not.toHaveProperty('rerank_model') + expect(gated?.inputSchema.properties).toHaveProperty('question') + expect(gated?.inputSchema.properties).toHaveProperty('budget') + + const open = findRetrieve(activeMcpTools('core', { semanticAvailable: true })) + expect(open?.inputSchema.properties).toHaveProperty('semantic') + expect(open?.inputSchema.properties).toHaveProperty('rerank') + + const defaulted = findRetrieve(activeMcpTools('core')) + expect(defaulted?.inputSchema.properties).toHaveProperty('rerank') + }) + + it('tools/list reflects project-local availability', async () => { + const unavailableRoot = makeTempRoot() + const availableRoot = makeTempRoot() + try { + const unavailableGraph = writeGraphFixture(unavailableRoot) + const availableGraph = writeGraphFixture(availableRoot) + installTransformersStub(availableRoot) + const { handleStdioRequest } = await import('../../src/runtime/stdio-server.js') + + const listFor = async (graphPath: string) => { + const response = (await Promise.resolve(handleStdioRequest(graphPath, { id: 1, method: 'tools/list' }))) as JsonRpcLine + return response?.result?.tools?.find((tool) => tool.name === 'retrieve') + } + + const gated = await listFor(unavailableGraph) + expect(gated?.inputSchema.properties).not.toHaveProperty('rerank') + expect(gated?.inputSchema.properties).not.toHaveProperty('semantic') + + const open = await listFor(availableGraph) + expect(open?.inputSchema.properties).toHaveProperty('rerank') + expect(open?.inputSchema.properties).toHaveProperty('semantic') + } finally { + rmSync(unavailableRoot, { recursive: true, force: true }) + rmSync(availableRoot, { recursive: true, force: true }) + } + }) + + it('doctor reports semantic availability without affecting health', async () => { + const unavailableRoot = makeTempRoot() + const availableRoot = makeTempRoot() + try { + installTransformersStub(availableRoot) + const { buildDoctorReport, runDoctorCommand } = await import('../../src/infrastructure/doctor.js') + + const unavailableReport = buildDoctorReport({ projectDir: unavailableRoot }) + expect(unavailableReport.semantic.available).toBe(false) + expect(unavailableReport.nextCommands.join(' ')).not.toContain('transformers') + + const availableReport = buildDoctorReport({ projectDir: availableRoot }) + expect(availableReport.semantic.available).toBe(true) + + expect(unavailableReport.healthy).toBe(availableReport.healthy) + + const unavailableOutput = runDoctorCommand({ projectDir: unavailableRoot }) + expect(unavailableOutput).toContain('semantic/rerank: unavailable') + expect(unavailableOutput).toContain('@huggingface/transformers') + + const availableOutput = runDoctorCommand({ projectDir: availableRoot }) + expect(availableOutput).toContain('semantic/rerank: available') + } finally { + rmSync(unavailableRoot, { recursive: true, force: true }) + rmSync(availableRoot, { recursive: true, force: true }) + } + }) +}) diff --git a/tests/unit/stdio-semantic.test.ts b/tests/unit/stdio-semantic.test.ts index 377f3a2..22ca5d3 100644 --- a/tests/unit/stdio-semantic.test.ts +++ b/tests/unit/stdio-semantic.test.ts @@ -44,9 +44,18 @@ describe('stdio semantic retrieve', () => { vi.clearAllMocks() }) - it('surfaces semantic options in the retrieve tool schema', async () => { + it('surfaces semantic options in the retrieve tool schema when transformers is installed', async () => { const root = createGraphFixtureRoot() try { + const packageDir = join(root, 'node_modules', '@huggingface', 'transformers') + mkdirSync(packageDir, { recursive: true }) + writeFileSync( + join(packageDir, 'package.json'), + JSON.stringify({ name: '@huggingface/transformers', version: '0.0.0-test', type: 'module', main: 'index.js' }), + 'utf8', + ) + writeFileSync(join(packageDir, 'index.js'), 'export async function pipeline() { return async () => [] }\n', 'utf8') + vi.resetModules() const { handleStdioRequest } = await import('../../src/runtime/stdio-server.js') const graphPath = join(root, 'graph.json') @@ -63,4 +72,25 @@ describe('stdio semantic retrieve', () => { rmSync(root, { recursive: true, force: true }) } }) + + it('omits semantic options from the retrieve tool schema when transformers is unavailable', async () => { + const root = createGraphFixtureRoot() + try { + vi.resetModules() + const { handleStdioRequest } = await import('../../src/runtime/stdio-server.js') + const graphPath = join(root, 'graph.json') + const toolsList = await Promise.resolve(handleStdioRequest(graphPath, { id: 1, method: 'tools/list' })) + const retrieveTool = (toolsList?.result as { tools: Array<{ name: string; inputSchema: { properties: Record } }> }).tools.find( + (tool) => tool.name === 'retrieve', + ) + + expect(retrieveTool?.inputSchema.properties).not.toHaveProperty('semantic') + expect(retrieveTool?.inputSchema.properties).not.toHaveProperty('semantic_model') + expect(retrieveTool?.inputSchema.properties).not.toHaveProperty('rerank') + expect(retrieveTool?.inputSchema.properties).not.toHaveProperty('rerank_model') + expect(retrieveTool?.inputSchema.properties).toHaveProperty('question') + } finally { + rmSync(root, { recursive: true, force: true }) + } + }) }) diff --git a/tests/unit/why-madar-doc.test.ts b/tests/unit/why-madar-doc.test.ts index 57225c7..8270b53 100644 --- a/tests/unit/why-madar-doc.test.ts +++ b/tests/unit/why-madar-doc.test.ts @@ -86,8 +86,9 @@ describe('public marketing copy honesty', () => { }) it('surfaces the current stable release and benchmark evidence pointers in the main README flow', () => { - expect(content).toContain('Current version: `0.28.0`') - expect(content).toContain('0.28.0 changelog') + const packageVersion = (JSON.parse(readDoc('package.json')) as { version: string }).version + expect(content).toContain(`Current version: \`${packageVersion}\``) + expect(content).toContain(`${packageVersion} changelog`) expect(content).toContain('madar summary') expect(content).toContain('docs/claims-and-evidence.md') expect(content).toContain('docs/benchmarks/suite/')