Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions plugins/orchestrator/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ node_modules/
*.db-wal
*.db-shm
.sidecar-port
.sidecar.log
47 changes: 39 additions & 8 deletions plugins/orchestrator/dist/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -22573,11 +22573,12 @@ function registerSessionOnce(sessionId) {
}
var sidecarStatus = "starting";
var sidecarError = null;
async function trySpawn(cmd, portFile, label, timeoutMs) {
var sidecarLogPath = null;
async function trySpawn(cmd, portFile, label, timeoutMs, logFd) {
try {
const proc = Bun.spawn(cmd, {
stdout: "ignore",
stderr: "ignore"
stdout: logFd >= 0 ? logFd : "ignore",
stderr: logFd >= 0 ? logFd : "ignore"
});
const maxAttempts = Math.ceil(timeoutMs / 2000);
let port = null;
Expand Down Expand Up @@ -22619,6 +22620,17 @@ async function startSidecar() {
const sidecarPath = resolve(pluginRoot, "sidecar/embed_server.py");
const requirementsPath = resolve(pluginRoot, "sidecar/requirements.txt");
const portFile = resolve(pluginRoot, ".sidecar-port");
sidecarLogPath = resolve(pluginRoot, ".sidecar.log");
const { openSync } = await import("fs");
let logFd = -1;
try {
logFd = openSync(sidecarLogPath, "w");
} catch {}
const envTimeoutRaw = process.env.ORCH_SIDECAR_BOOT_TIMEOUT_MS;
const envTimeout = envTimeoutRaw ? parseInt(envTimeoutRaw, 10) : NaN;
const envOverride = !isNaN(envTimeout) && envTimeout > 0 ? envTimeout : null;
const uvxTimeoutMs = envOverride ?? 180000;
const pythonTimeoutMs = envOverride ?? 30000;
try {
const content = await Bun.file(portFile).text();
const existingPort = parseInt(content.trim(), 10);
Expand All @@ -22635,20 +22647,20 @@ async function startSidecar() {
unlinkSync(portFile);
} catch {}
const baseArgs = ["--port", "0", "--port-file", portFile];
let result = await trySpawn(["uvx", "--with-requirements", requirementsPath, "python", sidecarPath, ...baseArgs], portFile, "uvx", 60000);
let result = await trySpawn(["uvx", "--with-requirements", requirementsPath, "python", sidecarPath, ...baseArgs], portFile, "uvx", uvxTimeoutMs, logFd);
if (!result) {
try {
const { unlinkSync } = await import("fs");
unlinkSync(portFile);
} catch {}
result = await trySpawn(["python", sidecarPath, ...baseArgs], portFile, "python", 30000);
result = await trySpawn(["python", sidecarPath, ...baseArgs], portFile, "python", pythonTimeoutMs, logFd);
}
if (!result) {
try {
const { unlinkSync } = await import("fs");
unlinkSync(portFile);
} catch {}
result = await trySpawn(["python3", sidecarPath, ...baseArgs], portFile, "python3", 30000);
result = await trySpawn(["python3", sidecarPath, ...baseArgs], portFile, "python3", pythonTimeoutMs, logFd);
}
if (!result) {
let hasUv = false;
Expand Down Expand Up @@ -22753,6 +22765,9 @@ server.tool("system_status", "Check the health of the orchestrator system: embed
if (sidecarError) {
lines.push(` - Reason: ${sidecarError}`);
}
if (sidecarLogPath) {
lines.push(` - Sidecar log: ${sidecarLogPath} (truncated on each boot attempt)`);
}
lines.push(" - To enable: call `install_embeddings` tool, or manually install uv (https://docs.astral.sh/uv/)");
}
lines.push(`- **Active sessions** (24h): ${activeSessions}`);
Expand Down Expand Up @@ -22806,11 +22821,19 @@ server.tool("install_embeddings", "Check and install dependencies needed for sem
checks4.uvPath = stdout.trim();
}
} catch {}
let modelCached = false;
try {
const { existsSync: existsSync4 } = await import("fs");
const { homedir: homedir2 } = await import("os");
const hubRoot = process.env.HF_HUB_CACHE ? process.env.HF_HUB_CACHE : resolve(process.env.HF_HOME || resolve(homedir2(), ".cache", "huggingface"), "hub");
modelCached = existsSync4(resolve(hubRoot, "models--BAAI--bge-m3"));
} catch {}
if (action === "check") {
lines.push("## Embedding Dependencies Check");
lines.push("");
lines.push(`- Python: ${checks4.python ? `installed (${checks4.pythonPath})` : "NOT FOUND"}`);
lines.push(`- uv: ${checks4.uv ? `installed (${checks4.uvPath})` : "NOT FOUND"}`);
lines.push(`- bge-m3 model cache: ${modelCached ? "present (~10s boot expected)" : "not yet downloaded (~2 GB on first boot)"}`);
lines.push(`- Sidecar: ${sidecarStatus}`);
lines.push("");
if (checks4.python && checks4.uv) {
Expand Down Expand Up @@ -22850,7 +22873,11 @@ server.tool("install_embeddings", "Check and install dependencies needed for sem
lines.push("Sidecar started successfully! Semantic search is now active.");
lines.push("Backfilling embeddings for existing notes in the background.");
} else {
lines.push("Sidecar failed to start. Check the logs for details.");
lines.push(sidecarLogPath ? `Sidecar failed to start. Check ${sidecarLogPath} for diagnostics.` : "Sidecar failed to start.");
if (!modelCached) {
lines.push("First-run downloads (~2 GB bge-m3 model + onnxruntime wheels) can exceed the default 180s timeout on slow connections.");
lines.push("Override with `ORCH_SIDECAR_BOOT_TIMEOUT_MS=600000` (ms) and call `install_embeddings(install)` again. Subsequent boots are ~10s once cached.");
}
}
return { content: [{ type: "text", text: lines.join(`
`) }] };
Expand All @@ -22876,7 +22903,11 @@ server.tool("install_embeddings", "Check and install dependencies needed for sem
lines.push("Sidecar started! Semantic search is now active.");
lines.push("First run will download the bge-m3 model (~1.5GB). This happens once and is cached.");
} else {
lines.push("uv installed but sidecar didn't start. Try restarting the session.");
lines.push(sidecarLogPath ? `uv installed but sidecar didn't start. Check ${sidecarLogPath} for diagnostics.` : "uv installed but sidecar didn't start. Try restarting the session.");
if (!modelCached) {
lines.push("First-run downloads (~2 GB bge-m3 model + onnxruntime wheels) can exceed the default 180s timeout on slow connections.");
lines.push("Override with `ORCH_SIDECAR_BOOT_TIMEOUT_MS=600000` (ms) and call `install_embeddings(install)` again.");
}
}
} else {
const stderr = await new Response(proc.stderr).text();
Expand Down
73 changes: 66 additions & 7 deletions plugins/orchestrator/mcp/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,17 +97,21 @@ function registerSessionOnce(sessionId: string): void {
}
let sidecarStatus: "ready" | "starting" | "unavailable" | "error" = "starting";
let sidecarError: string | null = null;
// Path to the sidecar boot/runtime log. Populated on first startSidecar call so
// system_status and install_embeddings can surface it when the spawn fails.
let sidecarLogPath: string | null = null;

async function trySpawn(
cmd: string[],
portFile: string,
label: string,
timeoutMs: number,
logFd: number,
): Promise<{ proc: ReturnType<typeof Bun.spawn>; port: number } | null> {
try {
const proc = Bun.spawn(cmd, {
stdout: "ignore",
stderr: "ignore",
stdout: logFd >= 0 ? logFd : "ignore",
stderr: logFd >= 0 ? logFd : "ignore",
});

// Wait for port file to appear, polling every 2s up to timeoutMs
Expand Down Expand Up @@ -155,6 +159,28 @@ async function startSidecar(): Promise<EmbeddingClient | null> {
const sidecarPath = resolve(pluginRoot, "sidecar/embed_server.py");
const requirementsPath = resolve(pluginRoot, "sidecar/requirements.txt");
const portFile = resolve(pluginRoot, ".sidecar-port");
sidecarLogPath = resolve(pluginRoot, ".sidecar.log");

// Open the log file (truncated) so spawned sidecars can write stdout/stderr to
// it. Replaces the previous stdio: "ignore" which made cold-start failures
// (slow connections + 60s timeout + ~2 GB bge-m3 download) self-undiagnosable.
// Falls back to "ignore" on open failure — never blocks a sidecar boot.
const { openSync } = await import("node:fs");
let logFd = -1;
try {
logFd = openSync(sidecarLogPath, "w");
} catch {
// Open failed — proceed with stdio: "ignore" behavior (preserves prior path).
}

// Boot timeout — first run must download ~2 GB of bge-m3 model + onnxruntime
// wheels, which on residential connections exceeds the previous 60s default.
// ORCH_SIDECAR_BOOT_TIMEOUT_MS overrides for slower links (or impatient ones).
const envTimeoutRaw = process.env.ORCH_SIDECAR_BOOT_TIMEOUT_MS;
const envTimeout = envTimeoutRaw ? parseInt(envTimeoutRaw, 10) : NaN;
const envOverride = !isNaN(envTimeout) && envTimeout > 0 ? envTimeout : null;
const uvxTimeoutMs = envOverride ?? 180_000;
const pythonTimeoutMs = envOverride ?? 30_000;

// Reuse an existing healthy sidecar if one is already running. Each Claude
// session spawns its own MCP server process, so without reuse we end up with
Expand Down Expand Up @@ -192,7 +218,8 @@ async function startSidecar(): Promise<EmbeddingClient | null> {
["uvx", "--with-requirements", requirementsPath, "python", sidecarPath, ...baseArgs],
portFile,
"uvx",
60000,
uvxTimeoutMs,
logFd,
);

// Fall back to direct python
Expand All @@ -203,7 +230,8 @@ async function startSidecar(): Promise<EmbeddingClient | null> {
["python", sidecarPath, ...baseArgs],
portFile,
"python",
30000,
pythonTimeoutMs,
logFd,
);
}

Expand All @@ -214,7 +242,8 @@ async function startSidecar(): Promise<EmbeddingClient | null> {
["python3", sidecarPath, ...baseArgs],
portFile,
"python3",
30000,
pythonTimeoutMs,
logFd,
);
}

Expand Down Expand Up @@ -363,6 +392,9 @@ server.tool(
if (sidecarError) {
lines.push(` - Reason: ${sidecarError}`);
}
if (sidecarLogPath) {
lines.push(` - Sidecar log: ${sidecarLogPath} (truncated on each boot attempt)`);
}
lines.push(" - To enable: call `install_embeddings` tool, or manually install uv (https://docs.astral.sh/uv/)");
}

Expand Down Expand Up @@ -434,11 +466,26 @@ server.tool(
}
} catch {}

// Detect whether the bge-m3 model is already in the HuggingFace cache.
// First-run boot must download ~2 GB from HuggingFace under unauthenticated
// rate limits — on slow links that exceeds the spawn timeout. This flag
// disambiguates first-run (~minutes) from broken-run (~seconds to fail).
let modelCached = false;
try {
const { existsSync } = await import("node:fs");
const { homedir } = await import("node:os");
const hubRoot = process.env.HF_HUB_CACHE
? process.env.HF_HUB_CACHE
: resolve(process.env.HF_HOME || resolve(homedir(), ".cache", "huggingface"), "hub");
modelCached = existsSync(resolve(hubRoot, "models--BAAI--bge-m3"));
} catch {}

if (action === "check") {
lines.push("## Embedding Dependencies Check");
lines.push("");
lines.push(`- Python: ${checks.python ? `installed (${checks.pythonPath})` : "NOT FOUND"}`);
lines.push(`- uv: ${checks.uv ? `installed (${checks.uvPath})` : "NOT FOUND"}`);
lines.push(`- bge-m3 model cache: ${modelCached ? "present (~10s boot expected)" : "not yet downloaded (~2 GB on first boot)"}`);
lines.push(`- Sidecar: ${sidecarStatus}`);
lines.push("");

Expand Down Expand Up @@ -484,7 +531,13 @@ server.tool(
lines.push("Sidecar started successfully! Semantic search is now active.");
lines.push("Backfilling embeddings for existing notes in the background.");
} else {
lines.push("Sidecar failed to start. Check the logs for details.");
lines.push(sidecarLogPath
? `Sidecar failed to start. Check ${sidecarLogPath} for diagnostics.`
: "Sidecar failed to start.");
if (!modelCached) {
lines.push("First-run downloads (~2 GB bge-m3 model + onnxruntime wheels) can exceed the default 180s timeout on slow connections.");
lines.push("Override with `ORCH_SIDECAR_BOOT_TIMEOUT_MS=600000` (ms) and call `install_embeddings(install)` again. Subsequent boots are ~10s once cached.");
}
}
return { content: [{ type: "text" as const, text: lines.join("\n") }] };
}
Expand Down Expand Up @@ -513,7 +566,13 @@ server.tool(
lines.push("Sidecar started! Semantic search is now active.");
lines.push("First run will download the bge-m3 model (~1.5GB). This happens once and is cached.");
} else {
lines.push("uv installed but sidecar didn't start. Try restarting the session.");
lines.push(sidecarLogPath
? `uv installed but sidecar didn't start. Check ${sidecarLogPath} for diagnostics.`
: "uv installed but sidecar didn't start. Try restarting the session.");
if (!modelCached) {
lines.push("First-run downloads (~2 GB bge-m3 model + onnxruntime wheels) can exceed the default 180s timeout on slow connections.");
lines.push("Override with `ORCH_SIDECAR_BOOT_TIMEOUT_MS=600000` (ms) and call `install_embeddings(install)` again.");
}
}
} else {
const stderr = await new Response(proc.stderr).text();
Expand Down