Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,13 @@ permissions:

jobs:
# ── Job 1: Agent 크로스컴파일 ────────────────────────────────────────────
# ubuntu-latest를 쓰는 이유: CGO 미사용 Go 바이너리 + Node runtime download +
# LSP bundle은 OS 독립적이라 어느 runner에서도 OK. macOS runner 분당 과금이
# ubuntu 대비 10x이므로 ubuntu에서 먼저 처리 후 artifact로 공유한다.
# ubuntu-latest를 쓰는 이유: 에이전트는 CGO_ENABLED=0(정적 링크)로 빌드돼
# 빌드 호스트의 glibc/OS 와 무관 + Node runtime download + LSP bundle 도 OS
# 독립적이라 어느 runner 에서도 OK. (과거 linux/amd64 는 네이티브 빌드라 CGO 가
# 켜져 빌드 호스트 glibc 에 동적 링크됐고, 구버전 배포판에서 'GLIBC_x.xx not
# found' 로 실패했다 — build-agent.ts 에서 CGO_ENABLED=0 을 강제해 해결.)
# macOS runner 분당 과금이 ubuntu 대비 10x 이므로 ubuntu 에서 먼저 처리 후
# artifact 로 공유한다.
build-agent:
name: Build agent (cross-compile)
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "nexus-code",
"productName": "NexusCode",
"version": "0.5.0",
"version": "0.5.1",
"description": "Multi-workspace VSCode-style editor for macOS. Monaco editor + terminal in one window.",
"license": "MIT",
"private": true,
Expand Down
19 changes: 17 additions & 2 deletions scripts/build-agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,25 @@ async function buildAgentBinaries(args: {
const name = `agent-${args.version}-${target.os}-${target.arch}`;
await args.run(
"go",
["build", "-ldflags=-s -w", "-o", path.join(args.outDir, name), "./cmd/agent"],
[
"build",
"-tags=netgo,osusergo",
"-ldflags=-s -w",
"-o",
path.join(args.outDir, name),
"./cmd/agent",
],
{
cwd: args.rootDir,
env: { ...process.env, GOOS: target.os, GOARCH: target.arch },
// CGO_ENABLED=0 forces a fully static binary. Without it the linux/amd64
// target is a *native* build on the CI runner, where Go defaults
// CGO_ENABLED to 1; because the agent imports "net" (and os/user), the
// binary then dynamically links the runner's glibc and requires that
// exact version at runtime — failing on older distros such as Ubuntu
// 20.04 (glibc 2.31) with "version `GLIBC_x.xx' not found". The
// netgo/osusergo tags keep the net and os/user resolvers in pure Go so
// disabling cgo does not silently change name resolution behavior.
env: { ...process.env, GOOS: target.os, GOARCH: target.arch, CGO_ENABLED: "0" },
},
);
artifacts.push({ ...target, path: name });
Expand Down
21 changes: 17 additions & 4 deletions src/main/infra/agent/channel/reconnecting-process-channel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,22 @@ export interface AgentReconnectOptions {
readonly maxDelayMs?: number;
}

/**
* Diagnostic context captured at the moment a child process closes. Passed to
* `closeError` so the resulting terminal error can record the exit code,
* signal, and the tail of the process's stderr (e.g. a loader error) — making
* the file log self-sufficient instead of an empty-cause `ssh.unknown`.
*/
export interface ChannelCloseContext {
readonly code?: number | null;
readonly signal?: NodeJS.Signals | null;
readonly stderrTail?: string;
}

export interface ReconnectingProcessChannelOptions {
readonly spawn: () => ChildProcessWithoutNullStreams;
readonly classifyStderr: StderrClassifier;
readonly closeError: (wasReady: boolean) => Error;
readonly closeError: (wasReady: boolean, context?: ChannelCloseContext) => Error;
readonly requestTimeoutMs?: number;
readonly expectedProtocolMajor?: string;
readonly reconnect?: AgentReconnectOptions;
Expand Down Expand Up @@ -218,12 +230,13 @@ export function createReconnectingProcessChannel(
if (active !== attempt) return;
attempt.closed = true;
clearForceKillTimer(attempt);
const { wasReady } = attempt.pipe.notifyClose();
const { wasReady, stderrTail } = attempt.pipe.notifyClose();
const closeContext: ChannelCloseContext = { code, signal, stderrTail };

if (state === "disposed" || terminalError) return;
if (code === 0 && wasReady) {
state = "terminal";
terminalError = options.closeError(true);
terminalError = options.closeError(true, closeContext);
emitLifecycle({ type: "exit", code, signal });
return;
}
Expand All @@ -244,7 +257,7 @@ export function createReconnectingProcessChannel(
return;
}

const error = options.closeError(wasReady);
const error = options.closeError(wasReady, closeContext);
attempt.pipe.fail(error);
state = "terminal";
terminalError = error;
Expand Down
40 changes: 34 additions & 6 deletions src/main/infra/agent/pipe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,13 @@ export interface NdjsonPipe {
dispose(): void;
/** Marks the pipe terminally failed from the orchestrator's side. */
fail(error: Error): void;
/** Flushes buffered lines and reports whether ready had settled before close. */
notifyClose(): { wasReady: boolean };
/**
* Flushes buffered lines and reports whether ready had settled before close.
* `stderrTail` carries the last few raw stderr lines (loader errors, panics,
* unclassified warnings) so the orchestrator can attach them to the terminal
* error's cause — the file log is then self-sufficient for diagnosis.
*/
notifyClose(): { wasReady: boolean; stderrTail: string };
}

/**
Expand All @@ -207,6 +212,13 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe {
});
const stderrLines = createLineSplitter(handleStderrLine);

// Ring buffer of the most recent raw stderr lines. Retained regardless of
// classification so an unrecognized fatal line (e.g. a glibc loader error)
// is still available as diagnostic context when the process closes.
const STDERR_TAIL_MAX_LINES = 20;
const STDERR_TAIL_MAX_LINE_CHARS = 200;
const recentStderr: string[] = [];

let nextRequestId = 1;
let disposed = false;
let terminalError: Error | null = null;
Expand Down Expand Up @@ -388,9 +400,23 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe {
}
}

// Retain the raw line before classification so it survives even when no
// pattern matches — the close handler reads this tail for the cause.
recordStderr(line);

const code = deps.classifyStderr(line);
if (code) {
selfFail(createSshError(code));
// Attach the offending line as cause so the file log shows *why* the
// agent failed to start, not just the classified code.
selfFail(createSshError(code, line));
}
}

/** Appends one stderr line to the bounded diagnostic tail buffer. */
function recordStderr(line: string): void {
recentStderr.push(line.slice(0, STDERR_TAIL_MAX_LINE_CHARS));
if (recentStderr.length > STDERR_TAIL_MAX_LINES) {
recentStderr.shift();
}
}

Expand Down Expand Up @@ -529,10 +555,10 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe {
rejectReady(error);
rejectPendingRequests(error);
},
notifyClose(): { wasReady: boolean } {
notifyClose(): { wasReady: boolean; stderrTail: string } {
stdoutLines.flush();
stderrLines.flush();
return { wasReady: readySettled };
return { wasReady: readySettled, stderrTail: recentStderr.join(" | ") };
},
};
}
Expand All @@ -553,7 +579,9 @@ export function createSshError(code: SshErrorCode, cause?: unknown): SshError {
try {
const causeMsg =
cause instanceof Error ? cause.message : cause === undefined ? "" : String(cause);
const causeSnippet = causeMsg.slice(0, 300);
// 600 chars (up from 300) so a close diagnostic carrying an exit code plus
// a multi-line stderr tail is not truncated before the actual error text.
const causeSnippet = causeMsg.slice(0, 600);
const stack = (error.stack ?? "").split("\n").slice(1, 6).join(" | ");
getMalformedStdoutLogger().warn(
`SshError throw: code=${code} cause=${causeSnippet} stack=${stack}`,
Expand Down
27 changes: 26 additions & 1 deletion src/main/infra/agent/ssh/channel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import { ChannelEventRegistry } from "../channel/event-registry";
import { createDisposedError, createSshError } from "../pipe";
import {
type AgentReconnectOptions,
type ChannelCloseContext,
createReconnectingProcessChannel,
} from "../channel/reconnecting-process-channel";
import { classifyAuthLine } from "./auth";
Expand Down Expand Up @@ -40,6 +41,29 @@ export interface SshChannelDependencies {
readonly reconnect?: AgentReconnectOptions;
}

/**
* Renders the close context into a single-line cause string for the terminal
* `ssh.unknown` error. `createSshError` logs this to the file transport, so an
* otherwise-opaque transport close ("disconnected by user", empty cause) now
* records the exit code, signal, and the tail of the agent's stderr. Returns
* undefined when nothing useful was captured, preserving the prior behavior.
*/
function formatCloseDiagnostic(context?: ChannelCloseContext): string | undefined {
if (!context) return undefined;
const parts: string[] = [];
if (context.code !== undefined && context.code !== null) {
parts.push(`exit code=${context.code}`);
}
if (context.signal) {
parts.push(`signal=${context.signal}`);
}
const stderrTail = context.stderrTail?.trim();
if (stderrTail) {
parts.push(`stderr=${stderrTail}`);
}
return parts.length > 0 ? `agent process closed (${parts.join(" ")})` : undefined;
}

/**
* Opens an SSH-backed NDJSON request channel to the remote agent. The
* orchestrator spawns the SSH client (via ssh-master) and composes an NDJSON
Expand All @@ -60,7 +84,8 @@ export function createSshChannel(
spawn: dependencies.spawn,
}),
classifyStderr: classifyAuthLine,
closeError: () => createSshError("ssh.unknown"),
closeError: (_wasReady, context) =>
createSshError("ssh.unknown", formatCloseDiagnostic(context)),
requestTimeoutMs: dependencies.requestTimeoutMs,
expectedProtocolMajor: REMOTE_AGENT_PROTOCOL_MAJOR,
reconnect: dependencies.reconnect,
Expand Down
7 changes: 7 additions & 0 deletions src/main/infra/agent/ssh/stderr-patterns.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ const SERVER_SPAWN_FAILED_PATTERNS = [
/\bexec: .*: not found\b/i,
/\bexec: .*: no such file or directory\b/i,
/cannot execute: required file not found/i,
// Dynamic-loader failures when the agent binary cannot run on the remote:
// a glibc too old for the binary's required symbol version (the exact case
// that surfaced as an empty-cause ssh.unknown before this classifier
// existed), a missing shared library, or an architecture mismatch.
/version `?GLIBC_[\d.]+'? not found/i,
/error while loading shared libraries/i,
/cannot execute binary file/i,
];

/**
Expand Down
39 changes: 39 additions & 0 deletions tests/unit/main/agent/stderr-patterns.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import { describe, expect, test } from "bun:test";
import { classifyStderrLine } from "../../../../src/main/infra/agent/ssh/stderr-patterns";

// Regression coverage for the dynamic-loader patterns. These were added after a
// glibc-too-old failure surfaced as an empty-cause `ssh.unknown` (the agent
// binary was dynamically linked against a newer glibc than the remote had).
// Classifying them as `server.spawn-failed` gives the user "Remote agent failed
// to start" instead of the generic transport error, and the offending line is
// attached as the SshError cause in the file log.
describe("classifyStderrLine — agent loader failures", () => {
test("glibc version mismatch", () => {
const line =
"agent: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by agent)";
expect(classifyStderrLine(line)).toBe("server.spawn-failed");
});

test("missing shared library", () => {
expect(
classifyStderrLine("agent: error while loading shared libraries: libfoo.so.1: cannot open"),
).toBe("server.spawn-failed");
});

test("architecture mismatch", () => {
expect(classifyStderrLine("bash: /x/agent: cannot execute binary file: Exec format error")).toBe(
"server.spawn-failed",
);
});

test("benign login banner is not misclassified", () => {
expect(classifyStderrLine("Welcome to monolith! Have a great day.")).toBeNull();
});

test("existing auth/connect classification is unchanged", () => {
expect(classifyStderrLine("Permission denied (publickey).")).toBe("ssh.auth-failed");
expect(classifyStderrLine("ssh: connect to host x port 22: Connection refused")).toBe(
"ssh.connect-failed",
);
});
});