diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d1fbaf0b..2d2e61df 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -27,9 +27,13 @@ permissions: jobs: # ── Job 1: Agent 크로스컴파일 ──────────────────────────────────────────── - # ubuntu-latest를 쓰는 이유: CGO 미사용 Go 바이너리 + Node runtime download + - # LSP bundle은 OS 독립적이라 어느 runner에서도 OK. macOS runner 분당 과금이 - # ubuntu 대비 10x이므로 ubuntu에서 먼저 처리 후 artifact로 공유한다. + # ubuntu-latest를 쓰는 이유: 에이전트는 CGO_ENABLED=0(정적 링크)로 빌드돼 + # 빌드 호스트의 glibc/OS 와 무관 + Node runtime download + LSP bundle 도 OS + # 독립적이라 어느 runner 에서도 OK. (과거 linux/amd64 는 네이티브 빌드라 CGO 가 + # 켜져 빌드 호스트 glibc 에 동적 링크됐고, 구버전 배포판에서 'GLIBC_x.xx not + # found' 로 실패했다 — build-agent.ts 에서 CGO_ENABLED=0 을 강제해 해결.) + # macOS runner 분당 과금이 ubuntu 대비 10x 이므로 ubuntu 에서 먼저 처리 후 + # artifact 로 공유한다. build-agent: name: Build agent (cross-compile) runs-on: ubuntu-latest diff --git a/package.json b/package.json index 6409d1ae..320f2702 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "nexus-code", "productName": "NexusCode", - "version": "0.5.0", + "version": "0.5.1", "description": "Multi-workspace VSCode-style editor for macOS. Monaco editor + terminal in one window.", "license": "MIT", "private": true, diff --git a/scripts/build-agent.ts b/scripts/build-agent.ts index 804c2fb5..7edb9536 100644 --- a/scripts/build-agent.ts +++ b/scripts/build-agent.ts @@ -214,10 +214,25 @@ async function buildAgentBinaries(args: { const name = `agent-${args.version}-${target.os}-${target.arch}`; await args.run( "go", - ["build", "-ldflags=-s -w", "-o", path.join(args.outDir, name), "./cmd/agent"], + [ + "build", + "-tags=netgo,osusergo", + "-ldflags=-s -w", + "-o", + path.join(args.outDir, name), + "./cmd/agent", + ], { cwd: args.rootDir, - env: { ...process.env, GOOS: target.os, GOARCH: target.arch }, + // CGO_ENABLED=0 forces a fully static binary. Without it the linux/amd64 + // target is a *native* build on the CI runner, where Go defaults + // CGO_ENABLED to 1; because the agent imports "net" (and os/user), the + // binary then dynamically links the runner's glibc and requires that + // exact version at runtime — failing on older distros such as Ubuntu + // 20.04 (glibc 2.31) with "version `GLIBC_x.xx' not found". The + // netgo/osusergo tags keep the net and os/user resolvers in pure Go so + // disabling cgo does not silently change name resolution behavior. + env: { ...process.env, GOOS: target.os, GOARCH: target.arch, CGO_ENABLED: "0" }, }, ); artifacts.push({ ...target, path: name }); diff --git a/src/main/infra/agent/channel/reconnecting-process-channel.ts b/src/main/infra/agent/channel/reconnecting-process-channel.ts index d58b2550..139129a0 100644 --- a/src/main/infra/agent/channel/reconnecting-process-channel.ts +++ b/src/main/infra/agent/channel/reconnecting-process-channel.ts @@ -22,10 +22,22 @@ export interface AgentReconnectOptions { readonly maxDelayMs?: number; } +/** + * Diagnostic context captured at the moment a child process closes. Passed to + * `closeError` so the resulting terminal error can record the exit code, + * signal, and the tail of the process's stderr (e.g. a loader error) — making + * the file log self-sufficient instead of an empty-cause `ssh.unknown`. + */ +export interface ChannelCloseContext { + readonly code?: number | null; + readonly signal?: NodeJS.Signals | null; + readonly stderrTail?: string; +} + export interface ReconnectingProcessChannelOptions { readonly spawn: () => ChildProcessWithoutNullStreams; readonly classifyStderr: StderrClassifier; - readonly closeError: (wasReady: boolean) => Error; + readonly closeError: (wasReady: boolean, context?: ChannelCloseContext) => Error; readonly requestTimeoutMs?: number; readonly expectedProtocolMajor?: string; readonly reconnect?: AgentReconnectOptions; @@ -218,12 +230,13 @@ export function createReconnectingProcessChannel( if (active !== attempt) return; attempt.closed = true; clearForceKillTimer(attempt); - const { wasReady } = attempt.pipe.notifyClose(); + const { wasReady, stderrTail } = attempt.pipe.notifyClose(); + const closeContext: ChannelCloseContext = { code, signal, stderrTail }; if (state === "disposed" || terminalError) return; if (code === 0 && wasReady) { state = "terminal"; - terminalError = options.closeError(true); + terminalError = options.closeError(true, closeContext); emitLifecycle({ type: "exit", code, signal }); return; } @@ -244,7 +257,7 @@ export function createReconnectingProcessChannel( return; } - const error = options.closeError(wasReady); + const error = options.closeError(wasReady, closeContext); attempt.pipe.fail(error); state = "terminal"; terminalError = error; diff --git a/src/main/infra/agent/pipe.ts b/src/main/infra/agent/pipe.ts index d25d2368..8695b297 100644 --- a/src/main/infra/agent/pipe.ts +++ b/src/main/infra/agent/pipe.ts @@ -184,8 +184,13 @@ export interface NdjsonPipe { dispose(): void; /** Marks the pipe terminally failed from the orchestrator's side. */ fail(error: Error): void; - /** Flushes buffered lines and reports whether ready had settled before close. */ - notifyClose(): { wasReady: boolean }; + /** + * Flushes buffered lines and reports whether ready had settled before close. + * `stderrTail` carries the last few raw stderr lines (loader errors, panics, + * unclassified warnings) so the orchestrator can attach them to the terminal + * error's cause — the file log is then self-sufficient for diagnosis. + */ + notifyClose(): { wasReady: boolean; stderrTail: string }; } /** @@ -207,6 +212,13 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { }); const stderrLines = createLineSplitter(handleStderrLine); + // Ring buffer of the most recent raw stderr lines. Retained regardless of + // classification so an unrecognized fatal line (e.g. a glibc loader error) + // is still available as diagnostic context when the process closes. + const STDERR_TAIL_MAX_LINES = 20; + const STDERR_TAIL_MAX_LINE_CHARS = 200; + const recentStderr: string[] = []; + let nextRequestId = 1; let disposed = false; let terminalError: Error | null = null; @@ -388,9 +400,23 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { } } + // Retain the raw line before classification so it survives even when no + // pattern matches — the close handler reads this tail for the cause. + recordStderr(line); + const code = deps.classifyStderr(line); if (code) { - selfFail(createSshError(code)); + // Attach the offending line as cause so the file log shows *why* the + // agent failed to start, not just the classified code. + selfFail(createSshError(code, line)); + } + } + + /** Appends one stderr line to the bounded diagnostic tail buffer. */ + function recordStderr(line: string): void { + recentStderr.push(line.slice(0, STDERR_TAIL_MAX_LINE_CHARS)); + if (recentStderr.length > STDERR_TAIL_MAX_LINES) { + recentStderr.shift(); } } @@ -529,10 +555,10 @@ export function createNdjsonPipe(deps: NdjsonPipeDependencies): NdjsonPipe { rejectReady(error); rejectPendingRequests(error); }, - notifyClose(): { wasReady: boolean } { + notifyClose(): { wasReady: boolean; stderrTail: string } { stdoutLines.flush(); stderrLines.flush(); - return { wasReady: readySettled }; + return { wasReady: readySettled, stderrTail: recentStderr.join(" | ") }; }, }; } @@ -553,7 +579,9 @@ export function createSshError(code: SshErrorCode, cause?: unknown): SshError { try { const causeMsg = cause instanceof Error ? cause.message : cause === undefined ? "" : String(cause); - const causeSnippet = causeMsg.slice(0, 300); + // 600 chars (up from 300) so a close diagnostic carrying an exit code plus + // a multi-line stderr tail is not truncated before the actual error text. + const causeSnippet = causeMsg.slice(0, 600); const stack = (error.stack ?? "").split("\n").slice(1, 6).join(" | "); getMalformedStdoutLogger().warn( `SshError throw: code=${code} cause=${causeSnippet} stack=${stack}`, diff --git a/src/main/infra/agent/ssh/channel.ts b/src/main/infra/agent/ssh/channel.ts index 05936474..81f5f508 100644 --- a/src/main/infra/agent/ssh/channel.ts +++ b/src/main/infra/agent/ssh/channel.ts @@ -8,6 +8,7 @@ import { ChannelEventRegistry } from "../channel/event-registry"; import { createDisposedError, createSshError } from "../pipe"; import { type AgentReconnectOptions, + type ChannelCloseContext, createReconnectingProcessChannel, } from "../channel/reconnecting-process-channel"; import { classifyAuthLine } from "./auth"; @@ -40,6 +41,29 @@ export interface SshChannelDependencies { readonly reconnect?: AgentReconnectOptions; } +/** + * Renders the close context into a single-line cause string for the terminal + * `ssh.unknown` error. `createSshError` logs this to the file transport, so an + * otherwise-opaque transport close ("disconnected by user", empty cause) now + * records the exit code, signal, and the tail of the agent's stderr. Returns + * undefined when nothing useful was captured, preserving the prior behavior. + */ +function formatCloseDiagnostic(context?: ChannelCloseContext): string | undefined { + if (!context) return undefined; + const parts: string[] = []; + if (context.code !== undefined && context.code !== null) { + parts.push(`exit code=${context.code}`); + } + if (context.signal) { + parts.push(`signal=${context.signal}`); + } + const stderrTail = context.stderrTail?.trim(); + if (stderrTail) { + parts.push(`stderr=${stderrTail}`); + } + return parts.length > 0 ? `agent process closed (${parts.join(" ")})` : undefined; +} + /** * Opens an SSH-backed NDJSON request channel to the remote agent. The * orchestrator spawns the SSH client (via ssh-master) and composes an NDJSON @@ -60,7 +84,8 @@ export function createSshChannel( spawn: dependencies.spawn, }), classifyStderr: classifyAuthLine, - closeError: () => createSshError("ssh.unknown"), + closeError: (_wasReady, context) => + createSshError("ssh.unknown", formatCloseDiagnostic(context)), requestTimeoutMs: dependencies.requestTimeoutMs, expectedProtocolMajor: REMOTE_AGENT_PROTOCOL_MAJOR, reconnect: dependencies.reconnect, diff --git a/src/main/infra/agent/ssh/stderr-patterns.ts b/src/main/infra/agent/ssh/stderr-patterns.ts index 3336130a..4dde2ba2 100644 --- a/src/main/infra/agent/ssh/stderr-patterns.ts +++ b/src/main/infra/agent/ssh/stderr-patterns.ts @@ -31,6 +31,13 @@ const SERVER_SPAWN_FAILED_PATTERNS = [ /\bexec: .*: not found\b/i, /\bexec: .*: no such file or directory\b/i, /cannot execute: required file not found/i, + // Dynamic-loader failures when the agent binary cannot run on the remote: + // a glibc too old for the binary's required symbol version (the exact case + // that surfaced as an empty-cause ssh.unknown before this classifier + // existed), a missing shared library, or an architecture mismatch. + /version `?GLIBC_[\d.]+'? not found/i, + /error while loading shared libraries/i, + /cannot execute binary file/i, ]; /** diff --git a/tests/unit/main/agent/stderr-patterns.test.ts b/tests/unit/main/agent/stderr-patterns.test.ts new file mode 100644 index 00000000..aed5f9be --- /dev/null +++ b/tests/unit/main/agent/stderr-patterns.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, test } from "bun:test"; +import { classifyStderrLine } from "../../../../src/main/infra/agent/ssh/stderr-patterns"; + +// Regression coverage for the dynamic-loader patterns. These were added after a +// glibc-too-old failure surfaced as an empty-cause `ssh.unknown` (the agent +// binary was dynamically linked against a newer glibc than the remote had). +// Classifying them as `server.spawn-failed` gives the user "Remote agent failed +// to start" instead of the generic transport error, and the offending line is +// attached as the SshError cause in the file log. +describe("classifyStderrLine — agent loader failures", () => { + test("glibc version mismatch", () => { + const line = + "agent: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found (required by agent)"; + expect(classifyStderrLine(line)).toBe("server.spawn-failed"); + }); + + test("missing shared library", () => { + expect( + classifyStderrLine("agent: error while loading shared libraries: libfoo.so.1: cannot open"), + ).toBe("server.spawn-failed"); + }); + + test("architecture mismatch", () => { + expect(classifyStderrLine("bash: /x/agent: cannot execute binary file: Exec format error")).toBe( + "server.spawn-failed", + ); + }); + + test("benign login banner is not misclassified", () => { + expect(classifyStderrLine("Welcome to monolith! Have a great day.")).toBeNull(); + }); + + test("existing auth/connect classification is unchanged", () => { + expect(classifyStderrLine("Permission denied (publickey).")).toBe("ssh.auth-failed"); + expect(classifyStderrLine("ssh: connect to host x port 22: Connection refused")).toBe( + "ssh.connect-failed", + ); + }); +});