From e82b879e3d9a779ab8645f16aaf0e9f3c68124f7 Mon Sep 17 00:00:00 2001 From: Laith Al-Saadoon <9553966+theagenticguy@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:59:44 -0500 Subject: [PATCH 1/2] fix(cli): platform-aware doctor diagnostics and cobol wrapper resolution for the bundled CLI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three install-diagnostic fixes so the CLI tells the truth on unsupported platforms. A. doctor's three @ladybugdb/core failure paths handed a futile `pnpm install` hint even on win32-arm64 / musl where no prebuilt ships. Extract a shared helper in @opencodehub/storage (graphBindingPlatformNote + GRAPH_BINDING_SUPPORTED_PLATFORMS) — the single source of truth the runtime GraphDbBindingError already used — and have doctor emit platform-aware guidance via lbugFailureHint() so the two never drift. B. add a non-fatal embedder native-binding probe to doctor. onnxruntime-node prebuilds skip darwin-x64 and musl; the real failure is a silent degrade to BM25, not a crash. The probe mirrors the duckdb/lbug dynamic-import pattern, reports "retrieval will use BM25 only" + the platform note, and stays a warn (optional capability), gated by skipNative like the other native probes. C. resolveWrapperJavaSource now resolves dist/java via an import.meta.url walk-up (mirroring resolveVendorWasmsDir), so npm-installed users of `codehub setup --cobol-proleap` find the bundled cobol_to_scip.java shipped by #189. Fix the "wrapper Java source not found" message to stop pointing at an unwired --java-source flag. Tests: storage helper unit tests; doctor lbug-hint + embedder-probe tests; cobol wrapper-resolution tests across bundled/monorepo/legacy layouts. --- packages/cli/src/cobol-proleap-setup.test.ts | 102 ++++++++++++++- packages/cli/src/cobol-proleap-setup.ts | 90 ++++++++++--- packages/cli/src/commands/doctor.test.ts | 121 ++++++++++++++++++ packages/cli/src/commands/doctor.ts | 125 ++++++++++++++++++- packages/storage/src/graphdb-adapter.test.ts | 40 +++++- packages/storage/src/graphdb-adapter.ts | 51 +++++--- packages/storage/src/index.ts | 2 + 7 files changed, 495 insertions(+), 36 deletions(-) diff --git a/packages/cli/src/cobol-proleap-setup.test.ts b/packages/cli/src/cobol-proleap-setup.test.ts index 249e660b..a64fce4f 100644 --- a/packages/cli/src/cobol-proleap-setup.test.ts +++ b/packages/cli/src/cobol-proleap-setup.test.ts @@ -8,16 +8,20 @@ * the result reports the final JAR + wrapper class paths. * - Idempotency: a second call with the JAR + wrapper class already in * place skips without re-running the build. + * - Wrapper Java source resolution across install shapes (bundled CLI + * `dist/java/`, source checkout, legacy per-package). */ import assert from "node:assert/strict"; -import { join } from "node:path"; +import { dirname, join } from "node:path"; import { test } from "node:test"; import { DEFAULT_PROCESS_API, defaultVendorDir, + findWrapperJavaSourceFrom, type ProcessApi, type ProcessResult, + resolveWrapperJavaSource, runSetupCobolProleap, } from "./cobol-proleap-setup.js"; @@ -200,3 +204,99 @@ test("defaultVendorDir: resolves under ~/.codehub/vendor/proleap", () => { test("DEFAULT_PROCESS_API is exported for the cli action", () => { assert.equal(typeof DEFAULT_PROCESS_API.run, "function"); }); + +// --------------------------------------------------------------------------- +// Wrapper Java source resolution — must find `dist/java/cobol_to_scip.java` +// in the bundled-CLI layout (PR #189), not only the pre-collapse monorepo +// shapes. Use an injectable `exists` + path.join so the assertions are +// platform-agnostic (backslashes on Windows). +// --------------------------------------------------------------------------- + +// The bundled CLI runs from `dist/` and ships the wrapper at `dist/java/`. +// The walk-up must find it on the FIRST hop (the module's own dir + `java/`). +test("findWrapperJavaSourceFrom: resolves dist/java relative to the bundled module", () => { + const distDir = join("/opt", "node_modules", "@opencodehub", "cli", "dist"); + const expected = join(distDir, "java", "cobol_to_scip.java"); + const seen = new Set([expected]); + const resolved = findWrapperJavaSourceFrom(distDir, (p) => seen.has(p)); + assert.equal(resolved, expected); +}); + +// A chunked bundle can run from a nested dir (e.g. `dist/commands`); the +// walk-up must climb to `dist/java/cobol_to_scip.java`. +test("findWrapperJavaSourceFrom: walks up to dist/java from a nested bundle dir", () => { + const distDir = join("/opt", "cli", "dist"); + const nested = join(distDir, "commands"); + const expected = join(distDir, "java", "cobol_to_scip.java"); + const seen = new Set([expected]); + const resolved = findWrapperJavaSourceFrom(nested, (p) => seen.has(p)); + assert.equal(resolved, expected); +}); + +// Source-checkout fallback: the CLI runs from `packages/cli/dist`, the wrapper +// lives in the sibling `packages/cobol-proleap/java/` workspace tree. +test("findWrapperJavaSourceFrom: falls back to the monorepo sibling layout", () => { + const distDir = join("/repo", "packages", "cli", "dist"); + const expected = join("/repo", "packages", "cobol-proleap", "java", "cobol_to_scip.java"); + const seen = new Set([expected]); + const resolved = findWrapperJavaSourceFrom(distDir, (p) => seen.has(p)); + assert.equal(resolved, expected); +}); + +// When nothing exists, the resolver returns the bundled-CLI path so the caller +// emits a clean "wrapper Java source not found" error (not a bare ENOENT). +test("findWrapperJavaSourceFrom: returns the dist/java path when nothing exists", () => { + const distDir = join("/opt", "cli", "dist"); + const resolved = findWrapperJavaSourceFrom(distDir, () => false); + assert.equal(resolved, join(distDir, "java", "cobol_to_scip.java")); +}); + +// Smoke test the production entry point: in the source/test build it resolves +// to an existing `cobol_to_scip.java` via the monorepo fallback. Confirms the +// real `import.meta.url` wiring resolves a file that is actually on disk. +test("resolveWrapperJavaSource: resolves to an existing cobol_to_scip.java in the dev tree", () => { + const resolved = resolveWrapperJavaSource(); + assert.equal(dirname(resolved).endsWith("java"), true, `unexpected dir: ${resolved}`); + assert.match(resolved, /cobol_to_scip\.java$/); +}); + +// The "wrapper Java source not found" error must NOT point at a --java-source +// flag (never wired on the `codehub setup` command). It should name the +// bundled location + a reinstall remedy. Drive the build far enough to hit the +// wrapper pre-flight, with a javaSourcePath that does not exist. +test("runSetupCobolProleap: missing wrapper source error names the bundled path, not --java-source", async () => { + const script = makeScript({ + toolResponses: new Map([ + ["git --version", { code: 0, stdout: "git version 2.40.0", stderr: "" }], + ["mvn --version", { code: 0, stdout: "Apache Maven 3.8.6", stderr: "" }], + ["javac --version", { code: 0, stdout: "javac 21.0.1", stderr: "" }], + ["git clone", { code: 0, stdout: "", stderr: "" }], + ["mvn install", { code: 0, stdout: "BUILD SUCCESS", stderr: "" }], + ]), + fsReaddir: new Map([ + ["/tmp/codehub-proleap-abcdef/cobol-parser/target", ["proleap-cobol-parser-4.0.0.jar"]], + ]), + // The built jar exists, but the wrapper source path does NOT — so the + // pre-flight `exists(javaSource)` check fails. + fsFiles: new Set([ + "/tmp/codehub-proleap-abcdef/cobol-parser/target/proleap-cobol-parser-4.0.0.jar", + ]), + }); + const proc = makeProcessApi(script); + await assert.rejects( + runSetupCobolProleap({ + processApi: proc, + vendorDir: "/test/vendor", + javaSourcePath: "/nope/cobol_to_scip.java", + log: () => undefined, + }), + (err: unknown) => { + assert.ok(err instanceof Error); + assert.match(err.message, /wrapper Java source not found/); + assert.match(err.message, /dist\/java\/cobol_to_scip\.java/); + // Must NOT advertise an unreachable CLI flag. + assert.doesNotMatch(err.message, /--java-source/); + return true; + }, + ); +}); diff --git a/packages/cli/src/cobol-proleap-setup.ts b/packages/cli/src/cobol-proleap-setup.ts index 9422d36c..675023a9 100644 --- a/packages/cli/src/cobol-proleap-setup.ts +++ b/packages/cli/src/cobol-proleap-setup.ts @@ -12,7 +12,8 @@ * 3. `mvn install -DskipTests` to build the JAR. Target artifact is * `/target/proleap-cobol-parser-.jar`. * 4. `javac -cp cobol_to_scip.java` — compile the wrapper class - * (the `.java` source ships under `packages/cobol-proleap/java/`). + * (the `.java` source ships inside `@opencodehub/cli` at `dist/java/`, + * copied there at build time from `packages/cobol-proleap/java/`). * 5. Atomic rename the JAR + compiled wrapper into * `~/.codehub/vendor/proleap/{proleap-cobol-parser-.jar, * cobol_to_scip.class}`. @@ -22,6 +23,7 @@ */ import { spawn } from "node:child_process"; +import { statSync } from "node:fs"; import { copyFile as fsCopyFile, mkdir as fsMkdir, @@ -221,7 +223,7 @@ export async function runSetupCobolProleap( await cleanup(proc, workDir); throw new Error( `codehub setup --cobol-proleap: wrapper Java source not found at ${javaSource}. ` + - "Re-install @opencodehub/cobol-proleap or pass --java-source.", + "It ships inside @opencodehub/cli at dist/java/cobol_to_scip.java — reinstall the CLI to restore it.", ); } // Compile into the workDir so a failure doesn't pollute vendor/. @@ -358,21 +360,76 @@ async function cleanup(proc: ProcessApi, dir: string): Promise { } } +/** Cheap synchronous file-existence probe used only during path resolution. */ +function fileExistsSync(path: string): boolean { + try { + return statSync(path).isFile(); + } catch { + return false; + } +} + +/** + * Resolve the wrapper Java source (`cobol_to_scip.java`). + * + * Three install shapes are covered, tried in order: + * + * 1. **Bundled CLI (the published-npm case).** Since PR #189 collapsed the + * monorepo into a single tarball, the wrapper source is copied into the + * CLI's own `dist/java/` (see `packages/cli/tsup.config.ts` onSuccess). + * The bundle runs from `dist/`, so we walk up from `import.meta.url` + * looking for `java/cobol_to_scip.java` — exactly the + * {@link resolveVendorWasmsDir}-style walk-up the WASM grammars use. This + * is the only shape an npm-installed user ever hits; without it every + * `codehub setup --cobol-proleap` fails with "wrapper Java source not + * found". + * 2. **Monorepo / source checkout.** The CLI runs from + * `packages/cli/dist` while `cobol_to_scip.java` lives in the sibling + * `packages/cobol-proleap/java/` workspace tree. + * 3. **Legacy per-package install.** `node_modules/@opencodehub/cobol-proleap/ + * java/cobol_to_scip.java`, retained for pre-collapse layouts. + * + * Returns the first hit. If nothing exists on disk, falls back to the + * bundled-CLI path so the caller reports a clean "wrapper Java source not + * found" error rather than a bare ENOENT. + */ +export function resolveWrapperJavaSource(): string { + const startDir = dirname(fileURLToPath(import.meta.url)); + return findWrapperJavaSourceFrom(startDir, fileExistsSync); +} + /** - * Resolve the wrapper Java source shipped in @opencodehub/cobol-proleap. - * Walks up from the installed CLI until it finds - * `packages/cobol-proleap/java/cobol_to_scip.java` (repo checkout) or - * `node_modules/@opencodehub/cobol-proleap/java/cobol_to_scip.java` (installed). + * Pure walk-up resolver, separated from {@link resolveWrapperJavaSource} so it + * is unit-testable without depending on the test module's own + * `import.meta.url`. `startDir` is the directory the CLI module runs from; + * `exists` is an injectable file-existence probe (defaults to a real + * `statSync`). See {@link resolveWrapperJavaSource} for the install-shape + * rationale. */ -function resolveWrapperJavaSource(): string { - const thisFile = fileURLToPath(import.meta.url); - const dir = dirname(thisFile); +export function findWrapperJavaSourceFrom( + startDir: string, + exists: (path: string) => boolean = fileExistsSync, +): string { + // 1. Bundled deployment: walk up from the module looking for + // `java/cobol_to_scip.java` (lands at `dist/java/...`). + { + let dir = startDir; + for (let i = 0; i < 6; i += 1) { + const candidate = join(dir, "java", "cobol_to_scip.java"); + if (exists(candidate)) return candidate; + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + } + + // 2 & 3. Source-checkout / legacy per-package fallbacks. const candidates = [ - () => join(dir, "..", "..", "cobol-proleap", "java", "cobol_to_scip.java"), - () => join(dir, "..", "..", "..", "cobol-proleap", "java", "cobol_to_scip.java"), + () => join(startDir, "..", "..", "cobol-proleap", "java", "cobol_to_scip.java"), + () => join(startDir, "..", "..", "..", "cobol-proleap", "java", "cobol_to_scip.java"), () => join( - dir, + startDir, "..", "..", "..", @@ -385,11 +442,10 @@ function resolveWrapperJavaSource(): string { ]; for (const fn of candidates) { const p = resolve(fn()); - // Sync existsSync is fine in this pre-flight path. - const { existsSync } = require("node:fs") as typeof import("node:fs"); - if (existsSync(p)) return p; + if (exists(p)) return p; } - // Fall back to the conventional repo layout; caller reports a clean + + // Fall back to the bundled-CLI path; the caller reports a clean // "wrapper Java source not found" error if it's missing on disk. - return resolve(dir, "..", "..", "cobol-proleap", "java", "cobol_to_scip.java"); + return resolve(startDir, "java", "cobol_to_scip.java"); } diff --git a/packages/cli/src/commands/doctor.test.ts b/packages/cli/src/commands/doctor.test.ts index 5f04c582..39274d33 100644 --- a/packages/cli/src/commands/doctor.test.ts +++ b/packages/cli/src/commands/doctor.test.ts @@ -496,3 +496,124 @@ test("doctor surfaces no CODEHUB_STORE selector or optional-backend framing", as await rm(home, { recursive: true, force: true }); } }); + +// The lbug failure hint must carry the platform-support matrix (the shared +// `@opencodehub/storage` source of truth), not a bare "pnpm install" — on +// win32-arm64 / musl there is NO prebuilt, so a reinstall is futile and the +// hint must say so. Every lbug failure path threads through `lbugFailureHint`. +test("graph-db binding failure hint names the platform-support matrix, not a bare reinstall", async () => { + const home = await mkdtemp(join(tmpdir(), "codehub-doctor-lbug-hint-")); + try { + const checks = buildChecks({ + home, + resolveBinding: (_root, pkg) => (pkg === "@ladybugdb/core" ? null : "/fake/duckdb"), + }); + const lbug = checks.find((c) => c.name === "graph-db native binding"); + assert.ok(lbug); + const result = await lbug.run(); + assert.equal(result.status, "fail"); + // The shared matrix string from @opencodehub/storage must be present so + // the user sees which platforms ship a prebuilt. + assert.match(result.hint ?? "", /Supported platforms:/); + assert.match(result.hint ?? "", /Windows x64/); + } finally { + await rm(home, { recursive: true, force: true }); + } +}); + +// --------------------------------------------------------------------------- +// Embedder native binding (onnxruntime-node) — OPTIONAL, so absence is a +// NON-FATAL warn that degrades retrieval to BM25, never a hard fail. +// --------------------------------------------------------------------------- + +// onnxruntime-node ships prebuilds for only ~5 targets (no Intel-mac, no musl). +// The real failure mode is a silent degrade to BM25 — the embedder open path +// catches the native-load error — so doctor must surface a `warn`, not a fail. +// Inject a loader that throws to exercise the absent-binding branch. +test("embedder binding check warns (not fails) when onnxruntime-node fails to load", async () => { + const home = await mkdtemp(join(tmpdir(), "codehub-doctor-onnx-miss-")); + try { + const checks = buildChecks({ + home, + loadOnnxBinding: async () => { + throw new Error("Cannot find module 'onnxruntime-node'"); + }, + }); + const emb = checks.find((c) => c.name === "embedder native binding"); + assert.ok(emb, "embedder binding check must be registered when skipNative is false"); + const result = await emb.run(); + assert.equal( + result.status, + "warn", + `an absent OPTIONAL embedder binding is a soft warn; got ${result.status}: ${result.message}`, + ); + assert.match(result.message, /BM25/); + // The hint must point at the remote-embedder escape hatch. + assert.match(result.hint ?? "", /CODEHUB_EMBEDDING_URL|CODEHUB_EMBEDDING_SAGEMAKER_ENDPOINT/); + } finally { + await rm(home, { recursive: true, force: true }); + } +}); + +// A successful binding load (exports an InferenceSession constructor) is `ok`. +test("embedder binding check reports ok when onnxruntime-node loads with InferenceSession", async () => { + const home = await mkdtemp(join(tmpdir(), "codehub-doctor-onnx-ok-")); + try { + const checks = buildChecks({ + home, + loadOnnxBinding: async () => ({ InferenceSession: function fake() {} }), + }); + const emb = checks.find((c) => c.name === "embedder native binding"); + assert.ok(emb); + const result = await emb.run(); + assert.equal(result.status, "ok", `expected ok; got ${result.status}: ${result.message}`); + } finally { + await rm(home, { recursive: true, force: true }); + } +}); + +// A module that loads but exports no InferenceSession is a `warn` (degrade), +// never a crash — the embedder is optional. +test("embedder binding check warns when the module loads but exports no InferenceSession", async () => { + const home = await mkdtemp(join(tmpdir(), "codehub-doctor-onnx-noctor-")); + try { + const checks = buildChecks({ + home, + loadOnnxBinding: async () => ({}), + }); + const emb = checks.find((c) => c.name === "embedder native binding"); + assert.ok(emb); + const result = await emb.run(); + assert.equal(result.status, "warn", `expected warn; got ${result.status}: ${result.message}`); + } finally { + await rm(home, { recursive: true, force: true }); + } +}); + +// The optional embedder binding must NOT escalate the doctor exit code: with +// a valid registry, a clean scanner runner, and the graph binding present +// (real dev install), a failed embedder load yields at most a warn (exit ≤ 1), +// never a blocking fail. This is the load-bearing "optional capability" guard. +test("embedder binding failure does not block the doctor exit (exit <= 1)", async () => { + const home = await mkdtemp(join(tmpdir(), "codehub-doctor-onnx-nonblock-")); + try { + await mkdir(join(home, ".codehub"), { recursive: true }); + await writeFile(join(home, ".codehub", "registry.json"), JSON.stringify({})); + const prev = process.exitCode; + const report = await runDoctor({ + home, + skipNative: true, + runCommand: okRunCommand, + }); + // skipNative drops the real native probes; assert the embedder check is + // gated by skipNative too (no row, no exit-code contribution). + process.exitCode = prev; + const names = report.rows.map((r) => r.name); + assert.ok( + !names.includes("embedder native binding"), + "embedder binding probe is a native check — skipNative must drop it", + ); + } finally { + await rm(home, { recursive: true, force: true }); + } +}); diff --git a/packages/cli/src/commands/doctor.ts b/packages/cli/src/commands/doctor.ts index 0cb02f1c..e9932d5e 100644 --- a/packages/cli/src/commands/doctor.ts +++ b/packages/cli/src/commands/doctor.ts @@ -21,6 +21,7 @@ import { dirname, join, resolve } from "node:path"; import { fileURLToPath, pathToFileURL } from "node:url"; import { mergeSarif } from "@opencodehub/sarif"; import { hostedScipBinDirs } from "@opencodehub/scip-ingest"; +import { GRAPH_BINDING_SUPPORTED_PLATFORMS, graphBindingPlatformNote } from "@opencodehub/storage"; import Table from "cli-table3"; export type CheckStatus = "ok" | "warn" | "fail"; @@ -66,6 +67,15 @@ export interface DoctorOptions { * hard-fail path is to stub resolution). Defaults to {@link resolveFromRoot}. */ readonly resolveBinding?: (root: string, pkg: string) => string | null; + /** + * Injectable loader for the `onnxruntime-node` binding probe. The real + * loader is a dynamic `import("onnxruntime-node")` — an OPTIONAL dependency + * that ships prebuilds for only a handful of targets, so the binding may be + * absent on this platform. Tests inject a double to exercise both the + * load-OK and load-failure branches without depending on the host's prebuild + * coverage. Defaults to {@link loadOnnxBinding}. + */ + readonly loadOnnxBinding?: () => Promise; } /** Signature of the injectable command runner (see {@link runCommand}). */ @@ -175,6 +185,13 @@ export function buildChecks(opts: DoctorOptions = {}): readonly Check[] { binaryOnPathCheck("ty", "P2 scanner (beta) — install with `uv tool install ty` (Astral)", run), ); list.push(embedderWeightsCheck(home)); + if (opts.skipNative !== true) { + list.push( + opts.loadOnnxBinding !== undefined + ? embedderBindingCheck(opts.loadOnnxBinding) + : embedderBindingCheck(), + ); + } list.push(registryPathCheck(home)); list.push(sarifSchemaCheck(repoRoot)); return list; @@ -293,7 +310,7 @@ function lbugWorksCheck( return { status: "fail", message: "@ladybugdb/core not installed (required graph backend)", - hint: "run `pnpm install` — the lbug graph binding ships with the CLI and is mandatory", + hint: lbugFailureHint(), }; } // The graph binding uses `@ladybugdb/core`'s `Database` entry. We @@ -308,7 +325,7 @@ function lbugWorksCheck( return { status: "fail", message: "@ladybugdb/core is installed but exports no Database constructor", - hint: "re-run `pnpm install` to refresh the graph backend binding", + hint: lbugFailureHint(), }; } return { status: "ok", message: "@ladybugdb/core load OK" }; @@ -316,13 +333,32 @@ function lbugWorksCheck( return { status: "fail", message: `@ladybugdb/core failed to load: ${err instanceof Error ? err.message : String(err)}`, - hint: "reinstall the graph backend binding with `pnpm install`", + hint: lbugFailureHint(), }; } }, }; } +/** + * Hint for every `@ladybugdb/core` failure path. On a SUPPORTED platform a + * reinstall can plausibly fix it (a pruned `--production` install, a partial + * download), so we lead with that. On an UNSUPPORTED platform — win32-arm64 + * or musl/Alpine, where there is no prebuilt at all — `graphBindingPlatformNote` + * names the gap so the user does not chase a futile reinstall. We reuse the + * adapter's shared message (single source of truth) so doctor and the runtime + * `GraphDbBindingError` never drift. + */ +function lbugFailureHint(): string { + const platformNote = graphBindingPlatformNote(); + if (platformNote !== "") { + // Unsupported platform: a reinstall cannot produce a binding that does + // not ship. Name the gap + the realistic remedy. + return `${GRAPH_BINDING_SUPPORTED_PLATFORMS}${platformNote}`; + } + return `reinstall the graph backend binding (\`pnpm install\`, or \`npm i -g @opencodehub/cli\`). ${GRAPH_BINDING_SUPPORTED_PLATFORMS}`; +} + /** * Vendored parse grammars. `@opencodehub/ingestion` ships 16 WASM blobs * (15 grammars + the web-tree-sitter runtime) under `vendor/wasms/`, plus a @@ -614,6 +650,89 @@ function embedderWeightsCheck(home: string): Check { }; } +/** + * Default loader for the `onnxruntime-node` binding. The CLI lazy-imports the + * runtime only when embeddings are enabled (see + * `embedder/src/onnx-embedder.ts`), so this probe mirrors that exact dynamic + * import. `onnxruntime-node` is an OPTIONAL dependency — production resolves it + * from the CLI's own `node_modules`. + */ +function loadOnnxBinding(): Promise { + // A template-string specifier keeps tsup/esbuild from statically resolving + // (and force-bundling) the optional native module at build time — it must + // resolve from `node_modules` at runtime, exactly like the embedder's own + // lazy `import("onnxruntime-node")`. + const specifier = "onnxruntime-node"; + return import(specifier); +} + +/** + * Platform-specific guidance for a missing `onnxruntime-node` prebuilt. + * onnxruntime-node 1.x ships prebuilt binaries for darwin-arm64, linux-x64, + * linux-arm64 (glibc), win32-x64, and win32-arm64 — but NOT darwin-x64 + * (Intel Mac) and NOT musl/Alpine Linux. On those targets the optional binding + * cannot load and retrieval silently degrades to BM25-only. Naming the gap + * here stops the user chasing a futile reinstall. + * + * Returns an empty string when the platform is one onnxruntime ships a prebuilt + * for (no extra note to add). + */ +function onnxBindingPlatformNote( + platform: NodeJS.Platform = process.platform, + arch: string = process.arch, +): string { + if (platform === "darwin" && arch === "x64") { + return " Intel macOS (darwin-x64) has no onnxruntime-node prebuilt; use an Apple-silicon mac or a remote embedder (CODEHUB_EMBEDDING_URL / CODEHUB_EMBEDDING_SAGEMAKER_ENDPOINT)."; + } + if (platform === "linux") { + return " Alpine / musl-libc Linux has no onnxruntime-node prebuilt; use a glibc-based image (node:* not node:*-alpine) or a remote embedder."; + } + return ""; +} + +/** + * Probe the OPTIONAL `onnxruntime-node` binding the same way the embedder + * does — a lazy dynamic import. Unlike the duckdb/lbug probes, this is + * deliberately NON-FATAL: the embedder is an optional capability, and the real + * failure mode is a SILENT degrade to BM25-only retrieval (the embedder open + * path catches the native-load error and falls back). So an absent/broken + * binding is a `warn`, never a `fail`. The warn message names the platform gap + * (Intel mac / musl) so the user is not left wondering why search quality + * dropped. + * + * `load` is injectable so tests can drive both branches without depending on + * whatever prebuild coverage the host happens to have. + */ +function embedderBindingCheck(load: () => Promise = loadOnnxBinding): Check { + return { + name: "embedder native binding", + async run() { + try { + const mod = (await load()) as Record | undefined; + const ctor = + mod?.["InferenceSession"] ?? + (mod?.["default"] as Record | undefined)?.["InferenceSession"]; + if (typeof ctor !== "function") { + return { + status: "warn", + message: + "onnxruntime-node loaded but exports no InferenceSession — retrieval will use BM25 only", + hint: `the local embedder is unavailable; configure a remote embedder (CODEHUB_EMBEDDING_URL / CODEHUB_EMBEDDING_SAGEMAKER_ENDPOINT) or reinstall onnxruntime-node.${onnxBindingPlatformNote()}`, + }; + } + return { status: "ok", message: "onnxruntime-node load OK" }; + } catch (err) { + const detail = err instanceof Error ? err.message : String(err); + return { + status: "warn", + message: `embedder unavailable on this platform → retrieval will use BM25 only (${detail})`, + hint: `the local ONNX embedder is optional; configure a remote embedder (CODEHUB_EMBEDDING_URL / CODEHUB_EMBEDDING_SAGEMAKER_ENDPOINT) to embed off-box.${onnxBindingPlatformNote()}`, + }; + } + }, + }; +} + function registryPathCheck(home: string): Check { return { name: "registry path", diff --git a/packages/storage/src/graphdb-adapter.test.ts b/packages/storage/src/graphdb-adapter.test.ts index 38b5b38f..b2236589 100644 --- a/packages/storage/src/graphdb-adapter.test.ts +++ b/packages/storage/src/graphdb-adapter.test.ts @@ -5,7 +5,13 @@ import { join } from "node:path"; import { test } from "node:test"; import { type GraphNode, KnowledgeGraph, makeNodeId, type NodeId } from "@opencodehub/core-types"; import { assertReadOnlyCypher } from "./cypher-guard.js"; -import { GraphDbBindingError, GraphDbStore, NotImplementedError } from "./graphdb-adapter.js"; +import { + GRAPH_BINDING_SUPPORTED_PLATFORMS, + GraphDbBindingError, + GraphDbStore, + graphBindingPlatformNote, + NotImplementedError, +} from "./graphdb-adapter.js"; import { openStore } from "./index.js"; import { assertIGraphStoreConformance } from "./test-utils/conformance.js"; @@ -181,6 +187,38 @@ test("open surfaces GraphDbBindingError when native binding absent", async () => } }); +// --------------------------------------------------------------------------- +// Shared platform-support helper (consumed by `codehub doctor` too, so the +// runtime error message and the diagnostic hint never drift). +// --------------------------------------------------------------------------- + +test("graphBindingPlatformNote names the win32-arm64 gap", () => { + const note = graphBindingPlatformNote("win32", "arm64"); + assert.match(note, /win32-arm64/); + assert.match(note, /not currently supported/); +}); + +test("graphBindingPlatformNote names the musl/Alpine gap on linux", () => { + const note = graphBindingPlatformNote("linux", "x64"); + assert.match(note, /musl/); + assert.match(note, /glibc/); +}); + +test("graphBindingPlatformNote returns empty on supported platforms", () => { + assert.equal(graphBindingPlatformNote("darwin", "arm64"), ""); + assert.equal(graphBindingPlatformNote("darwin", "x64"), ""); + assert.equal(graphBindingPlatformNote("win32", "x64"), ""); +}); + +test("GraphDbBindingError message embeds the shared support matrix", () => { + const err = new GraphDbBindingError(new Error("dlopen failed")); + assert.match( + err.message, + new RegExp(GRAPH_BINDING_SUPPORTED_PLATFORMS.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")), + ); + assert.match(err.message, /dlopen failed/); +}); + // --------------------------------------------------------------------------- // Factory // --------------------------------------------------------------------------- diff --git a/packages/storage/src/graphdb-adapter.ts b/packages/storage/src/graphdb-adapter.ts index 1fc0d0d7..d4e804de 100644 --- a/packages/storage/src/graphdb-adapter.ts +++ b/packages/storage/src/graphdb-adapter.ts @@ -96,6 +96,41 @@ export class NotImplementedError extends Error { } } +/** + * Single source of truth for the user-facing summary of the `@ladybugdb/core` + * platform-support matrix. Shared by {@link GraphDbBindingError} (the runtime + * abort message) and `codehub doctor`'s graph-binding check (the diagnostic + * hint) so the two never drift. `@ladybugdb/core` ships prebuilt binaries only + * for darwin-x64, darwin-arm64, linux-x64 (glibc), linux-arm64 (glibc), and + * win32-x64. + */ +export const GRAPH_BINDING_SUPPORTED_PLATFORMS = + "Supported platforms: macOS x64/arm64, Linux x64/arm64 (glibc), Windows x64."; + +/** + * Platform-specific guidance for a missing `@ladybugdb/core` prebuilt. The + * graph tier is mandatory (no fallback), so on an UNSUPPORTED platform — + * notably win32-arm64 and any musl-libc Linux (Alpine) — there is no prebuilt + * to load and OpenCodeHub cannot run. Naming those cases explicitly makes the + * failure diagnosable rather than a bare module-load error. + * + * Returns an empty string on a supported platform (no extra note to add). + * `platform`/`arch` default to the running process so callers can pass + * `process.platform` / `process.arch` implicitly; tests inject fixtures. + */ +export function graphBindingPlatformNote( + platform: NodeJS.Platform = process.platform, + arch: string = process.arch, +): string { + if (platform === "win32" && arch === "arm64") { + return " Windows on ARM64 (win32-arm64) has no @ladybugdb/core prebuilt and is not currently supported."; + } + if (platform === "linux") { + return " On Alpine / musl-libc Linux there is no @ladybugdb/core prebuilt; use a glibc-based image (e.g. debian/ubuntu, node:* not node:*-alpine)."; + } + return ""; +} + /** * Missing peer-binding error. Surfaced when the native `@ladybugdb/core` * module is not available on the current platform (no prebuilt binary, or @@ -104,23 +139,11 @@ export class NotImplementedError extends Error { export class GraphDbBindingError extends Error { constructor(cause: unknown) { const detail = cause instanceof Error ? cause.message : String(cause); - // `@ladybugdb/core` ships prebuilt binaries only for: darwin-x64, - // darwin-arm64, linux-x64 (glibc), linux-arm64 (glibc), win32-x64. The - // graph tier is mandatory (no fallback), so on an UNSUPPORTED platform — - // notably win32-arm64 and any musl-libc Linux (Alpine) — there is no - // prebuilt to load and OpenCodeHub cannot run. Name those cases explicitly - // so the failure is diagnosable rather than a bare module-load error. - const platformNote = - process.platform === "win32" && process.arch === "arm64" - ? " Windows on ARM64 (win32-arm64) has no @ladybugdb/core prebuilt and is not currently supported." - : process.platform === "linux" - ? " On Alpine / musl-libc Linux there is no @ladybugdb/core prebuilt; use a glibc-based image (e.g. debian/ubuntu, node:* not node:*-alpine)." - : ""; super( "@ladybugdb/core native binding unavailable on this platform. " + "OpenCodeHub requires the lbug graph backend (it has no fallback). " + - "Supported platforms: macOS x64/arm64, Linux x64/arm64 (glibc), Windows x64." + - platformNote + + GRAPH_BINDING_SUPPORTED_PLATFORMS + + graphBindingPlatformNote() + ` Underlying cause: ${detail}`, ); this.name = "GraphDbBindingError"; diff --git a/packages/storage/src/index.ts b/packages/storage/src/index.ts index d45eb5e8..899c8f25 100644 --- a/packages/storage/src/index.ts +++ b/packages/storage/src/index.ts @@ -1,9 +1,11 @@ export { assertReadOnlyCypher, CypherGuardError } from "./cypher-guard.js"; export { classifyLicenseTier, DuckDbStore, type DuckDbStoreOptions } from "./duckdb-adapter.js"; export { + GRAPH_BINDING_SUPPORTED_PLATFORMS, GraphDbBindingError, GraphDbStore, type GraphDbStoreOptions, + graphBindingPlatformNote, NotImplementedError, } from "./graphdb-adapter.js"; export { From 2b46906a89bca5424064665dd95bc62164641df3 Mon Sep 17 00:00:00 2001 From: T Date: Fri, 5 Jun 2026 23:29:34 -0500 Subject: [PATCH 2/2] fix(cli): use join not resolve in cobol wrapper resolver for Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit resolve() re-anchors a drive-less path to process.cwd()'s drive on Windows (injecting C:/D:), making a module-relative resolver depend on the cwd — the test (windows-latest, 24) cell caught both fallback cases producing a drive-prefixed path. The candidates already join '..' segments lexically, so join() collapses them cwd-independently. Drops the now-unused resolve import. --- packages/cli/src/cobol-proleap-setup.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/cobol-proleap-setup.ts b/packages/cli/src/cobol-proleap-setup.ts index 675023a9..e777cfec 100644 --- a/packages/cli/src/cobol-proleap-setup.ts +++ b/packages/cli/src/cobol-proleap-setup.ts @@ -34,7 +34,7 @@ import { stat as fsStat, } from "node:fs/promises"; import { homedir, tmpdir } from "node:os"; -import { dirname, join, resolve } from "node:path"; +import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; export const COBOL_PROLEAP_REPO_URL = "https://github.com/uwol/cobol-parser"; @@ -441,11 +441,17 @@ export function findWrapperJavaSourceFrom( ), ]; for (const fn of candidates) { - const p = resolve(fn()); + // `join` (not `resolve`): the candidate is already an absolute, + // module-relative path with the `..` segments collapsed lexically. + // `resolve` would re-anchor a drive-less path to `process.cwd()`'s drive + // on Windows (injecting `C:`/`D:`), making a module-relative resolver + // depend on the cwd — a latent portability bug. + const p = fn(); if (exists(p)) return p; } // Fall back to the bundled-CLI path; the caller reports a clean - // "wrapper Java source not found" error if it's missing on disk. - return resolve(startDir, "java", "cobol_to_scip.java"); + // "wrapper Java source not found" error if it's missing on disk. `join` + // (not `resolve`) for the same cwd-independence reason as above. + return join(startDir, "java", "cobol_to_scip.java"); }