diff --git a/AGENTS.md b/AGENTS.md
index 883d0152..50aff2ee 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -214,6 +214,7 @@ Namespaces (`src/cli/log.ts`):
 |---|---|---|
 | `mcp` | McpSource construction; per-call dispatch showing `taskSupport` / `path=task-augmented\|inline` / cached tool count | "Why is my tool going inline vs task-augmented?" "Is my tool cache populated?" |
 | `sse` | Every `tool.progress` / `tool.done` entering the runtime sink wrap; every `data.changed` broadcast with client count | "Are progress events reaching the SSE layer?" "Are broadcasts happening, to how many clients?" |
+| `registry` | Per-source skips inside `ToolRegistry.availableTools` (one line per stuck source per enumeration; operator-facing "first failure" signal lives at the lifecycle transition site as a warn — see issue #194) | "Which sources are being skipped on each chat turn?" "Is enumeration silently dropping a connector?" |
 
 Add a namespace by calling `log.debug("ns", "message")` (from `src/cli/log.ts`). Keep this table and the `log.ts` doc comment in sync.
 
diff --git a/src/api/server.ts b/src/api/server.ts
index 8a5ce362..5ea3cd5f 100644
--- a/src/api/server.ts
+++ b/src/api/server.ts
@@ -62,7 +62,36 @@ export function startServer(options: ServerOptions): ServerHandle {
   const internalToken = runtime.getInternalToken();
 
   const mcpSources = runtime.mcpSources();
-  const healthMonitor = new HealthMonitor(mcpSources, runtime.getEventSink());
+  // Wire HealthMonitor into the bundle lifecycle so a detected stdio
+  // crash transitions BundleInstance.state through the lifecycle's
+  // recordCrash/Recovery/Dead funnel. This is what gives the operator
+  // a single warn-on-first-failure log instead of one warn per chat
+  // turn forever (issue #194), and keeps the Configure UI's state pill
+  // accurate when a subprocess dies mid-session.
+  const lifecycle = runtime.getLifecycle();
+  const healthMonitor = new HealthMonitor(mcpSources, runtime.getEventSink(), {
+    reportSourceTransition: (source, to) => {
+      // Resolve source → BundleInstance by (serverName, wsId). McpSource
+      // instances stamped with workspaceId via bundleContext at
+      // construction time — see `McpSource.getWorkspaceId`. The wsId
+      // disambiguates the same connector installed in multiple
+      // workspaces; without it a crash in workspace A could land on
+      // workspace B's BundleInstance (whichever the iteration hit first),
+      // leaving the actual crashed bundle stuck at `running` in its
+      // workspace's Configure UI. In-process platform sources and
+      // sources missing a bundleContext (rare) return undefined and we
+      // silently skip — they don't back a BundleInstance.
+      const wsId = source.getWorkspaceId();
+      if (!wsId) return;
+      const inst = runtime
+        .getBundleInstances()
+        .find((i) => i.serverName === source.name && i.wsId === wsId);
+      if (!inst) return;
+      if (to === "crashed") lifecycle.recordCrash(inst.serverName, inst.wsId);
+      else if (to === "running") lifecycle.recordRecovery(inst.serverName, inst.wsId);
+      else lifecycle.recordDead(inst.serverName, inst.wsId);
+    },
+  });
   healthMonitor.start();
 
   // SSE event manager — listens to runtime events and broadcasts to clients
diff --git a/src/bundles/connection.ts b/src/bundles/connection.ts
index c93d379e..d13360a7 100644
--- a/src/bundles/connection.ts
+++ b/src/bundles/connection.ts
@@ -105,3 +105,30 @@ export function summarizeConnectionState(connections: Map<string, Connection>):
   if (states.includes("crashed")) return "crashed";
   return states[0]!;
 }
+
+/**
+ * States that warrant operator attention when entered. Transitioning INTO
+ * one of these is the "first failure" signal worth a warn log; staying in
+ * one across many tool-list enumerations is not (that's the spam this set
+ * eliminates — see issue #194).
+ *
+ *   - `dead`            — process / transport gave up
+ *   - `crashed`         — transport closed unexpectedly mid-session
+ *   - `reauth_required` — refresh token rejected; user must reconnect
+ *
+ * Excluded deliberately:
+ *   - `not_authenticated` — silent resting state (fresh install, post-Disconnect)
+ *   - `pending_auth`      — in-flight OAuth, expected during normal Connect
+ *   - `starting`          — transient boot state
+ *   - `stopped`           — operator intent (stopBundle / uninstall)
+ *   - `running`           — healthy
+ */
+const BROKEN_STATES: ReadonlySet<BundleState> = new Set<BundleState>([
+  "dead",
+  "crashed",
+  "reauth_required",
+]);
+
+export function isBrokenState(s: BundleState): boolean {
+  return BROKEN_STATES.has(s);
+}
diff --git a/src/bundles/lifecycle.ts b/src/bundles/lifecycle.ts
index 984a1fd8..9d97786e 100644
--- a/src/bundles/lifecycle.ts
+++ b/src/bundles/lifecycle.ts
@@ -17,6 +17,7 @@ import { connectorSlug, hasPersistedComposioConnection } from "./composio-connec
 import {
   type Connection,
   type ConnectionState,
+  isBrokenState,
   summarizeConnectionState,
   WORKSPACE_PRINCIPAL_ID,
 } from "./connection.ts";
@@ -878,9 +879,37 @@ export class BundleLifecycleManager {
   /**
    * Update state on a BundleInstance. Public so HealthMonitor can
    * report crashed/recovered/dead transitions.
+   *
+   * Edge-triggered logging (issue #194):
+   *   - non-broken → broken: one `log.warn` at the moment of transition.
+   *     Subsequent enumerations of the now-stuck source produce zero
+   *     operator-visible noise.
+   *   - broken → running: one `log.info` ("recovered") at the moment of
+   *     recovery, symmetric with the warn.
+   *   - No-op (same state in, same state out): silent.
+   *
+   * "Broken" set lives in `connection.ts::isBrokenState` =
+   * { dead, crashed, reauth_required }. Excludes `pending_auth` (in-flight
+   * OAuth) and `not_authenticated` (resting state).
    */
   transition(instance: BundleInstance, newState: BundleState): void {
+    const oldState = instance.state;
     instance.state = newState;
+    if (oldState === newState) return;
+
+    updateLastBrokenState(instance, oldState, newState);
+
+    if (isBrokenState(newState) && !isBrokenState(oldState)) {
+      log.warn(
+        `[bundles] ${instance.serverName} (ws ${instance.wsId}) entered '${newState}' from '${oldState}'. ` +
+          `Tools unavailable until recovered. ${transitionRecoveryHint(newState)}`,
+      );
+    } else if (newState === "running" && instance.lastBrokenState !== undefined) {
+      log.info(
+        `[bundles] ${instance.serverName} (ws ${instance.wsId}) recovered: '${instance.lastBrokenState}' → 'running'`,
+      );
+      instance.lastBrokenState = undefined;
+    }
   }
 
   /**
@@ -972,7 +1001,9 @@ export class BundleLifecycleManager {
 
     // Recompute summary state so legacy consumers (HealthMonitor,
     // briefing-collector, runtime status API) see the right surface.
-    instance.state = summarizeConnectionState(instance.connections);
+    // Routed through transition() so edge-triggered logging fires on
+    // non-broken → broken / broken → running transitions (issue #194).
+    this.transition(instance, summarizeConnectionState(instance.connections));
 
     this.eventSink.emit({
       type: "connection.state_changed",
@@ -1741,6 +1772,56 @@ interface UpjackScheduleDeclaration {
 // Internal helpers
 // ---------------------------------------------------------------------------
 
+/**
+ * Maintain `instance.lastBrokenState` for one transition. Three rules,
+ * in priority order:
+ *
+ *   1. Entering / staying broken    → set to newState (latest wins)
+ *   2. Leaving broken with no label → capture oldState (handles
+ *      instances constructed directly into a broken state, never
+ *      having flowed through this funnel before)
+ *   3. Operator-stop                → clear (explicit end of episode;
+ *      a later boot → running must not emit a stale recovery log)
+ *
+ * The matching "leaving broken to running → clear after logging" case
+ * lives in `transition()` itself because it needs to read the value to
+ * format the recovery message before clearing.
+ *
+ * Split out from `transition()` so the funnel reads top-down as
+ * state-write → sticky maintenance → log decision, without sticky-bit
+ * branching obscuring the log policy.
+ */
+function updateLastBrokenState(
+  instance: BundleInstance,
+  oldState: BundleState,
+  newState: BundleState,
+): void {
+  if (isBrokenState(newState)) {
+    instance.lastBrokenState = newState;
+  } else if (isBrokenState(oldState) && instance.lastBrokenState === undefined) {
+    instance.lastBrokenState = oldState;
+  } else if (newState === "stopped") {
+    instance.lastBrokenState = undefined;
+  }
+}
+
+/**
+ * One-line operator hint appended to the broken-state warn. Tells the
+ * reader what recovery path applies without forcing them to look it up.
+ */
+function transitionRecoveryHint(state: BundleState): string {
+  switch (state) {
+    case "reauth_required":
+      return "User must reconnect via Connectors page.";
+    case "dead":
+      return "Use startBundle to restart.";
+    case "crashed":
+      return "HealthMonitor will attempt auto-restart with exponential backoff.";
+    default:
+      return "";
+  }
+}
+
 function createInstance(
   serverName: string,
   bundleName: string,
diff --git a/src/bundles/types.ts b/src/bundles/types.ts
index 27108143..c9cd5246 100644
--- a/src/bundles/types.ts
+++ b/src/bundles/types.ts
@@ -351,6 +351,19 @@ export interface BundleInstance {
   description?: string;
   /** Current lifecycle state. */
   state: BundleState;
+  /**
+   * Sticky breadcrumb of the most recent broken state observed since
+   * the last successful recovery to `running`. Owned by
+   * `BundleLifecycleManager.transition()` — set on any transition where
+   * the new (or old, for boot-into-broken instances) state is in the
+   * broken set, cleared on reaching `running` or an operator-driven
+   * `stopped`. Lets the recovery info log fire on multi-step paths the
+   * direct edge check misses — notably the URL bundle reconnect flow
+   * `reauth_required → pending_auth → running`, where neither
+   * transition is `broken → running`. Process-local; no persistence
+   * needed because a fresh process starts a fresh episode.
+   */
+  lastBrokenState?: BundleState;
   /** MTF trust score from mpak (0-100), or null if unavailable. */
   trustScore: number | null;
   /** UI placement metadata from _meta["ai.nimblebrain/host"]. */
diff --git a/src/cli/log.ts b/src/cli/log.ts
index b73bed3c..15b8db39 100644
--- a/src/cli/log.ts
+++ b/src/cli/log.ts
@@ -14,6 +14,8 @@
  * Known namespaces:
  *   - `mcp` — McpSource construction, dispatch decisions (task-augmented vs inline)
  *   - `sse` — Runtime event sink → SSE broadcast (tool.progress, data.changed)
+ *   - `registry` — ToolRegistry.availableTools per-source skips (one line per stuck
+ *     source per enumeration; operator-facing signal lives at lifecycle transition)
  *
  * Keep this list in sync with the CLAUDE.md "Debugging" section so it's
  * discoverable without reading source.
diff --git a/src/runtime/runtime.ts b/src/runtime/runtime.ts
index 0b449bb6..c4f318cf 100644
--- a/src/runtime/runtime.ts
+++ b/src/runtime/runtime.ts
@@ -1647,19 +1647,27 @@ export class Runtime {
     return [...names];
   }
 
-  /** Get MCP sources across all workspace registries (for health monitoring). */
+  /** Get MCP sources across all workspace registries (for health monitoring).
+   *
+   * Returns every McpSource instance — no dedupe by `source.name`.
+   * The same bundle (e.g. `linear-mcp`) installed in workspaces A and B
+   * produces two distinct McpSource processes, both of which need
+   * monitoring; collapsing them would leave one workspace's
+   * `BundleInstance.state` permanently stale on a crash.
+   *
+   * `Set<McpSource>` keys on instance identity so any McpSource
+   * registered directly in multiple workspace registries is monitored
+   * once. `SharedSourceRef` wrappers don't satisfy `instanceof McpSource`
+   * and are passed over here — their inner source is monitored via the
+   * native registry it was constructed in. */
   mcpSources(): McpSource[] {
-    const sources: McpSource[] = [];
-    const seen = new Set<string>();
+    const sources = new Set<McpSource>();
     for (const reg of this._workspaceRegistries.values()) {
       for (const s of reg.getSources()) {
-        if (s instanceof McpSource && !seen.has(s.name)) {
-          seen.add(s.name);
-          sources.push(s);
-        }
+        if (s instanceof McpSource) sources.add(s);
       }
     }
-    return sources;
+    return [...sources];
   }
 
   /** Get all tracked bundle instances (unfiltered — use getBundleInstancesForWorkspace for scoped access). */
diff --git a/src/tools/health-monitor.ts b/src/tools/health-monitor.ts
index 3d1266e9..0ed502d9 100644
--- a/src/tools/health-monitor.ts
+++ b/src/tools/health-monitor.ts
@@ -1,18 +1,18 @@
 import type { EventSink } from "../engine/types.ts";
 import type { McpSource } from "./mcp-source.ts";
 
-export type BundleState = "healthy" | "restarting" | "dead";
+export type HealthRecordState = "healthy" | "restarting" | "dead";
 
 export interface BundleHealth {
   name: string;
-  state: BundleState;
+  state: HealthRecordState;
   uptime: number | null;
   restartCount: number;
 }
 
 interface BundleRecord {
   source: McpSource;
-  state: BundleState;
+  state: HealthRecordState;
   restartCount: number;
 }
 
@@ -20,9 +20,31 @@ const MAX_RESTARTS = 5;
 const DEFAULT_BASE_DELAY_MS = 1000;
 const DEFAULT_CHECK_INTERVAL_MS = 30_000;
 
+/**
+ * Reported HealthMonitor transition for the BundleInstance underlying a
+ * source. `crashed` and `dead` map to `lifecycle.recordCrash` /
+ * `recordDead`; `running` maps to `recordRecovery`. The single shape
+ * pushes the per-method switch + source→instance lookup into the
+ * caller (`startServer`) so HealthMonitor stays unaware of lifecycle
+ * internals — useful when the caller wants to inject a different
+ * backing system (tests, alternative observability paths).
+ */
+export type HealthMonitorTransition = "crashed" | "running" | "dead";
+
 export interface HealthMonitorOptions {
   checkIntervalMs?: number;
   baseDelayMs?: number;
+  /**
+   * Propagate a detected health transition back to `BundleInstance.state`
+   * via the bundle lifecycle. Without this hook the URL bundle path
+   * (which funnels through `recordConnectionStateChange`) still works,
+   * but stdio subprocess crashes leave the user-facing state stuck at
+   * `running`. Hook is no-op safe; the caller is responsible for the
+   * source → instance resolution (returning early if the source doesn't
+   * back any BundleInstance, e.g. shared or in-process platform sources).
+   * See issue #194 for the operator log story this enables.
+   */
+  reportSourceTransition?: (source: McpSource, to: HealthMonitorTransition) => void;
 }
 
 /**
@@ -34,6 +56,7 @@ export class HealthMonitor {
   private timer: ReturnType<typeof setInterval> | null = null;
   private checkIntervalMs: number;
   private baseDelayMs: number;
+  private reportSourceTransition: HealthMonitorOptions["reportSourceTransition"];
 
   constructor(
     sources: McpSource[],
@@ -42,9 +65,10 @@ export class HealthMonitor {
   ) {
     this.checkIntervalMs = opts.checkIntervalMs ?? DEFAULT_CHECK_INTERVAL_MS;
     this.baseDelayMs = opts.baseDelayMs ?? DEFAULT_BASE_DELAY_MS;
+    this.reportSourceTransition = opts.reportSourceTransition;
     this.records = sources.map((source) => ({
       source,
-      state: "healthy" as BundleState,
+      state: "healthy" as HealthRecordState,
       restartCount: 0,
     }));
   }
@@ -98,6 +122,14 @@ export class HealthMonitor {
       },
     });
 
+    // Propagate to BundleInstance.state on first detection only. The
+    // `record.state === "healthy"` guard distinguishes the first failure
+    // from subsequent sweeps that find us in "restarting" — we don't want
+    // to report crashed on every poll cycle while a restart is pending.
+    if (record.state === "healthy") {
+      this.reportSourceTransition?.(record.source, "crashed");
+    }
+
     // Check if we've exhausted restart attempts
     if (record.restartCount >= MAX_RESTARTS) {
       record.state = "dead";
@@ -109,6 +141,7 @@ export class HealthMonitor {
           ...(remote ? { remote: true } : {}),
         },
       });
+      this.reportSourceTransition?.(record.source, "dead");
       return;
     }
 
@@ -149,6 +182,7 @@ export class HealthMonitor {
           ...(remote ? { remote: true } : {}),
         },
       });
+      this.reportSourceTransition?.(record.source, "running");
     } else {
       // Restart failed — check again on next cycle (might hit max)
       record.state = "restarting";
diff --git a/src/tools/mcp-source.ts b/src/tools/mcp-source.ts
index d3764e90..b696cd0b 100644
--- a/src/tools/mcp-source.ts
+++ b/src/tools/mcp-source.ts
@@ -321,6 +321,24 @@ export class McpSource implements ToolSource {
     return this.mode.type === "remote";
   }
 
+  /**
+   * Workspace this source was constructed for, when available. Bundle-
+   * spawned sources (URL, stdio, named, local) always carry a workspace
+   * via `bundleContext.workspaceId` — that is the field returned here.
+   * In-process platform sources and short-lived test sources don't pass
+   * a `bundleContext` and return `undefined`.
+   *
+   * Used by `HealthMonitor`'s `reportSourceTransition` hook so the
+   * source → BundleInstance lookup is keyed on `(serverName, wsId)`
+   * rather than name alone. Without the wsId disambiguation, the same
+   * bundle (e.g. `linear-mcp`) installed in two workspaces would all
+   * route their crash detection to whichever instance happened to be
+   * first in iteration order, leaving the other stuck at `running`.
+   */
+  getWorkspaceId(): string | undefined {
+    return this.bundleContext?.workspaceId;
+  }
+
   async start(): Promise<void> {
     // Fresh stderr state on every start. Restart cycles must not bleed
     // a dead instance's tail into the new instance's crash report.
diff --git a/src/tools/registry.ts b/src/tools/registry.ts
index 8b99159d..2d45fb8e 100644
--- a/src/tools/registry.ts
+++ b/src/tools/registry.ts
@@ -109,15 +109,22 @@ export class ToolRegistry implements ToolRouter {
       // workspace source. Surface the failure in the source's own
       // status (Configure page renders it from BundleInstance.state)
       // and leave the chat usable.
+      //
+      // Operator-facing signal lives at the lifecycle transition site
+      // (one warn per non-broken → broken edge — see
+      // `BundleLifecycleManager.transition` and issue #194). This skip
+      // path fires per chat turn; logging at warn level here would
+      // spam stderr for any source stuck in a broken state.
+      // Gated behind `NB_DEBUG=registry` for trace-level diagnosis.
       let tools: Tool[];
       try {
         tools = await source.tools();
       } catch (err) {
         const msg = err instanceof Error ? err.message : String(err);
-        log.warn(
-          `[registry] availableTools: skipping source "${source.name}" — ${msg}. ` +
-            `The bundle's own state surface (Connectors page) reflects this; the chat list ` +
-            `omits its tools until the source recovers.`,
+        log.debug(
+          "registry",
+          `availableTools: skipping source "${source.name}" — ${msg}. ` +
+            `Operator signal lives at lifecycle transition (warn on enter-broken).`,
         );
         continue;
       }
diff --git a/test/integration/health-monitor-lifecycle.test.ts b/test/integration/health-monitor-lifecycle.test.ts
new file mode 100644
index 00000000..429ec0ce
--- /dev/null
+++ b/test/integration/health-monitor-lifecycle.test.ts
@@ -0,0 +1,280 @@
+import { afterAll, afterEach, beforeEach, describe, expect, it } from "bun:test";
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { BundleLifecycleManager } from "../../src/bundles/lifecycle.ts";
+import { log } from "../../src/cli/log.ts";
+import type { EngineEvent, EventSink } from "../../src/engine/types.ts";
+import { HealthMonitor } from "../../src/tools/health-monitor.ts";
+import { McpSource } from "../../src/tools/mcp-source.ts";
+import { ToolRegistry } from "../../src/tools/registry.ts";
+
+/**
+ * End-to-end integration of HealthMonitor's crash detection with the
+ * bundle lifecycle's `transition()` funnel (issue #194).
+ *
+ * Unit tests cover `transition()` in isolation. This file exercises
+ * the wiring that lives in `src/api/server.ts`:
+ *
+ *   HealthMonitor.check() observes !isAlive()
+ *     → reportSourceTransition(source, "crashed")
+ *       → source.getWorkspaceId() lookup
+ *         → lifecycle.recordCrash(name, wsId)
+ *           → transition(running → crashed)
+ *             → log.warn fires once
+ *             → instance.state === "crashed"
+ *
+ * Repeats the resolution to verify the (name, wsId) keying — same
+ * server name in two workspaces, only the crashed one transitions.
+ */
+
+const testDir = join(tmpdir(), `nb-health-monitor-lifecycle-${Date.now()}`);
+
+function setupTestDir() {
+  if (existsSync(testDir)) rmSync(testDir, { recursive: true });
+  mkdirSync(testDir, { recursive: true });
+}
+
+afterAll(() => {
+  if (existsSync(testDir)) rmSync(testDir, { recursive: true });
+});
+
+function makeSink(): EventSink & { events: EngineEvent[] } {
+  const events: EngineEvent[] = [];
+  return {
+    events,
+    emit(event) {
+      events.push(event);
+    },
+  };
+}
+
+function createEchoBundleOnDisk(dir: string): string {
+  mkdirSync(dir, { recursive: true });
+  const nodeModulesPath = join(import.meta.dir, "../..", "node_modules");
+  const serverCode = `
+const { Server } = require("${nodeModulesPath}/@modelcontextprotocol/sdk/dist/cjs/server/index.js");
+const { StdioServerTransport } = require("${nodeModulesPath}/@modelcontextprotocol/sdk/dist/cjs/server/stdio.js");
+const { ListToolsRequestSchema, CallToolRequestSchema } = require("${nodeModulesPath}/@modelcontextprotocol/sdk/dist/cjs/types.js");
+
+async function main() {
+  const server = new Server({ name: "echo-test", version: "0.1.0" }, { capabilities: { tools: {} } });
+  server.setRequestHandler(ListToolsRequestSchema, async () => ({
+    tools: [{ name: "echo", description: "Echo input", inputSchema: { type: "object" } }],
+  }));
+  server.setRequestHandler(CallToolRequestSchema, async () => ({ content: [{ type: "text", text: "ok" }] }));
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+}
+main();
+`;
+  writeFileSync(join(dir, "server.cjs"), serverCode);
+  const manifest = {
+    manifest_version: "0.4",
+    name: "@test/echo",
+    version: "1.0.0",
+    description: "Echo test bundle",
+    author: { name: "Test Author" },
+    server: {
+      type: "node",
+      entry_point: "server.cjs",
+      mcp_config: { command: "node", args: ["${__dirname}/server.cjs"] },
+    },
+  };
+  writeFileSync(join(dir, "manifest.json"), JSON.stringify(manifest, null, 2));
+  return dir;
+}
+
+/**
+ * Wire a HealthMonitor exactly the way `startServer` does — single hook
+ * doing (name, wsId) resolution against the lifecycle. Mirroring the
+ * production adapter shape is the point of this test file.
+ */
+function buildAdaptedMonitor(
+  sources: McpSource[],
+  lifecycle: BundleLifecycleManager,
+  eventSink: EventSink,
+): HealthMonitor {
+  return new HealthMonitor(sources, eventSink, {
+    // Short check interval so the test doesn't have to wait the 30s default,
+    // though tests drive `check()` manually below; setting this defensively
+    // in case anyone later switches to timer-driven assertions.
+    checkIntervalMs: 1000,
+    baseDelayMs: 1, // skip the real exponential backoff in restart attempts
+    reportSourceTransition: (source, to) => {
+      const wsId = source.getWorkspaceId();
+      if (!wsId) return;
+      const inst = lifecycle.getInstances().find((i) => i.serverName === source.name && i.wsId === wsId);
+      if (!inst) return;
+      if (to === "crashed") lifecycle.recordCrash(inst.serverName, inst.wsId);
+      else if (to === "running") lifecycle.recordRecovery(inst.serverName, inst.wsId);
+      else lifecycle.recordDead(inst.serverName, inst.wsId);
+    },
+  });
+}
+
+/** Force-mark an McpSource dead so HealthMonitor's `isAlive()` check
+ *  returns false on the next sweep. Mirrors what `transport.onclose`
+ *  does on a real subprocess crash, without the test having to find
+ *  and SIGKILL the child PID.
+ *
+ *  Also stubs `restart()` to fail so the HealthMonitor's restart
+ *  attempt doesn't immediately bounce the source back to healthy
+ *  (which would flip `BundleInstance.state` back to running before
+ *  the test's assertions can observe `crashed`). Production behavior
+ *  on a real fatal crash with no recovery path matches this. */
+function markSourceCrashed(source: McpSource): void {
+  // McpSource keeps `dead` and `restart` accessible; casts here let the
+  // test simulate a transport-level crash plus an irrecoverable bundle
+  // (e.g. config invalid, vendor outage) without dragging in real
+  // subprocess kill semantics or vendor-dependent retry behavior.
+  const internal = source as unknown as {
+    dead: boolean;
+    restart: () => Promise<boolean>;
+  };
+  internal.dead = true;
+  internal.restart = async () => false;
+}
+
+describe("HealthMonitor ↔ BundleLifecycleManager — end-to-end crash chain", () => {
+  let originalWorkDir: string | undefined;
+  let originalWarn: (msg: string) => void;
+  let originalInfo: (msg: string) => void;
+  let warnCalls: string[];
+  let infoCalls: string[];
+
+  beforeEach(() => {
+    setupTestDir();
+    originalWorkDir = process.env.NB_WORK_DIR;
+    process.env.NB_WORK_DIR = testDir;
+    originalWarn = log.warn;
+    originalInfo = log.info;
+    warnCalls = [];
+    infoCalls = [];
+    log.warn = (msg) => {
+      warnCalls.push(msg);
+    };
+    log.info = (msg) => {
+      infoCalls.push(msg);
+    };
+  });
+
+  afterEach(() => {
+    if (originalWorkDir === undefined) delete process.env.NB_WORK_DIR;
+    else process.env.NB_WORK_DIR = originalWorkDir;
+    log.warn = originalWarn;
+    log.info = originalInfo;
+  });
+
+  it(
+    "detected crash flows through reportSourceTransition → recordCrash → transition → warn",
+    async () => {
+      const bundleDir = createEchoBundleOnDisk(join(testDir, "echo-chain"));
+      const registry = new ToolRegistry();
+      const sink = makeSink();
+      const lifecycle = new BundleLifecycleManager(sink, undefined);
+      // Provide the wsId-bearing deps factory so the spawned McpSource
+      // carries a workspaceId. Resolver/rateLimit are never called
+      // because the test never executes a tool call.
+      lifecycle.setBundleMcpDepsFactory((wsId) => ({
+        workspaceId: wsId,
+        // biome-ignore lint/suspicious/noExplicitAny: test stubs, never invoked
+        hostResources: {} as any,
+        // biome-ignore lint/suspicious/noExplicitAny: test stubs, never invoked
+        rateLimit: {} as any,
+      }));
+
+      const instance = await lifecycle.installLocal(bundleDir, registry, "ws_test");
+      expect(instance.state).toBe("running");
+
+      const source = registry.getSources().find((s) => s.name === instance.serverName);
+      expect(source).toBeInstanceOf(McpSource);
+      expect((source as McpSource).getWorkspaceId()).toBe("ws_test");
+
+      const monitor = buildAdaptedMonitor([source as McpSource], lifecycle, sink);
+
+      // Reset any boot-time logs so the assertions below see only the
+      // transition output we're testing.
+      warnCalls.length = 0;
+      infoCalls.length = 0;
+
+      // Simulate the transport-level crash that production HealthMonitor
+      // would see when a subprocess dies (transport.onclose → dead=true).
+      markSourceCrashed(source as McpSource);
+
+      await monitor.check();
+
+      // BundleInstance.state reflects the crash, NOT a stale "running".
+      expect(instance.state).toBe("crashed");
+      // Exactly one operator-facing warn fired (the first-failure signal
+      // — subsequent sweeps must NOT re-warn).
+      expect(warnCalls.length).toBe(1);
+      expect(warnCalls[0]).toContain("crashed");
+      expect(warnCalls[0]).toContain(instance.serverName);
+
+      // Subsequent sweeps with no recovery must not produce additional warns
+      // (record.state guard inside checkOne — already in restarting/dead).
+      await monitor.check();
+      await monitor.check();
+      expect(warnCalls.length).toBe(1);
+
+      await registry.removeSource(instance.serverName);
+    },
+    30_000,
+  );
+
+  it(
+    "same serverName in two workspaces: crash only updates the matching wsId's instance",
+    async () => {
+      const bundleDirA = createEchoBundleOnDisk(join(testDir, "echo-wsA"));
+      const bundleDirB = createEchoBundleOnDisk(join(testDir, "echo-wsB"));
+
+      const registryA = new ToolRegistry();
+      const registryB = new ToolRegistry();
+      const sink = makeSink();
+      const lifecycle = new BundleLifecycleManager(sink, undefined);
+      lifecycle.setBundleMcpDepsFactory((wsId) => ({
+        workspaceId: wsId,
+        // biome-ignore lint/suspicious/noExplicitAny: test stubs, never invoked
+        hostResources: {} as any,
+        // biome-ignore lint/suspicious/noExplicitAny: test stubs, never invoked
+        rateLimit: {} as any,
+      }));
+
+      const instA = await lifecycle.installLocal(bundleDirA, registryA, "ws_alpha");
+      const instB = await lifecycle.installLocal(bundleDirB, registryB, "ws_beta");
+
+      // Both instances share the same serverName because the manifest
+      // names match. The pre-#194 lookup keyed on name alone would
+      // collapse them; the (name, wsId) key keeps them distinct.
+      expect(instA.serverName).toBe(instB.serverName);
+
+      const sourceA = registryA.getSources().find((s) => s.name === instA.serverName) as McpSource;
+      const sourceB = registryB.getSources().find((s) => s.name === instB.serverName) as McpSource;
+      expect(sourceA.getWorkspaceId()).toBe("ws_alpha");
+      expect(sourceB.getWorkspaceId()).toBe("ws_beta");
+
+      const monitor = buildAdaptedMonitor([sourceA, sourceB], lifecycle, sink);
+
+      warnCalls.length = 0;
+      infoCalls.length = 0;
+
+      // Crash ONLY workspace alpha's instance.
+      markSourceCrashed(sourceA);
+
+      await monitor.check();
+
+      expect(instA.state).toBe("crashed");
+      // The cross-workspace bug pre-fix: instB.state would also flip
+      // because the lookup matched on name only and hit whichever
+      // instance came first in iteration. With (name, wsId) keying, B
+      // stays running.
+      expect(instB.state).toBe("running");
+      expect(warnCalls.length).toBe(1);
+
+      await registryA.removeSource(instA.serverName);
+      await registryB.removeSource(instB.serverName);
+    },
+    30_000,
+  );
+});
diff --git a/test/unit/bundle-transition-logging.test.ts b/test/unit/bundle-transition-logging.test.ts
new file mode 100644
index 00000000..b6086c76
--- /dev/null
+++ b/test/unit/bundle-transition-logging.test.ts
@@ -0,0 +1,303 @@
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { BundleLifecycleManager } from "../../src/bundles/lifecycle.ts";
+import { log } from "../../src/cli/log.ts";
+import type { EngineEvent, EventSink } from "../../src/engine/types.ts";
+import type { BundleInstance, BundleState } from "../../src/bundles/types.ts";
+
+/**
+ * Edge-triggered logging on BundleInstance.state transitions. Issue #194.
+ *
+ * Per-turn `availableTools()` enumeration of a stuck source must NOT emit
+ * any operator-visible log. The single warn lives at the lifecycle
+ * transition site, fires once per non-broken → broken edge, and a
+ * matching info fires once per broken → running recovery edge.
+ *
+ * Broken set = { dead, crashed, reauth_required }. Excludes pending_auth
+ * (in-flight OAuth — expected during normal Connect) and not_authenticated
+ * (resting state on fresh install / after Disconnect).
+ */
+
+function makeInstance(state: BundleState = "running"): BundleInstance {
+  return {
+    serverName: "test-src",
+    bundleName: "@scope/test",
+    version: "1.0.0",
+    state,
+    trustScore: null,
+    ui: null,
+    briefing: null,
+    httpProxy: null,
+    protected: false,
+    type: "plain",
+    wsId: "ws_test",
+  };
+}
+
+function makeSink(): EventSink & { events: EngineEvent[] } {
+  const events: EngineEvent[] = [];
+  return {
+    events,
+    emit(event) {
+      events.push(event);
+    },
+  };
+}
+
+describe("BundleLifecycleManager.transition — edge-triggered logging", () => {
+  let lifecycle: BundleLifecycleManager;
+  let warnCalls: string[];
+  let infoCalls: string[];
+  let originalWarn: (msg: string) => void;
+  let originalInfo: (msg: string) => void;
+
+  beforeEach(() => {
+    lifecycle = new BundleLifecycleManager(makeSink(), undefined);
+    warnCalls = [];
+    infoCalls = [];
+    originalWarn = log.warn;
+    originalInfo = log.info;
+    log.warn = (msg) => {
+      warnCalls.push(msg);
+    };
+    log.info = (msg) => {
+      infoCalls.push(msg);
+    };
+  });
+
+  afterEach(() => {
+    log.warn = originalWarn;
+    log.info = originalInfo;
+  });
+
+  test("running → dead emits exactly one warn", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    expect(warnCalls.length).toBe(1);
+    expect(warnCalls[0]).toContain("test-src");
+    expect(warnCalls[0]).toContain("dead");
+  });
+
+  test("running → crashed emits one warn", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "crashed");
+    expect(warnCalls.length).toBe(1);
+    expect(warnCalls[0]).toContain("crashed");
+  });
+
+  test("running → reauth_required emits one warn", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "reauth_required");
+    expect(warnCalls.length).toBe(1);
+    expect(warnCalls[0]).toContain("reauth_required");
+  });
+
+  test("repeated dead → dead does NOT warn again (no-op transition)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "dead");
+    expect(warnCalls.length).toBe(1);
+  });
+
+  test("broken → broken (dead → crashed) does NOT warn (already broken)", () => {
+    const inst = makeInstance("dead");
+    lifecycle.transition(inst, "crashed");
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("running → pending_auth does NOT warn (in-flight OAuth, not broken)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "pending_auth");
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("running → not_authenticated does NOT warn (resting state)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "not_authenticated");
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("running → starting does NOT warn (transient)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "starting");
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("running → stopped does NOT warn (operator intent)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "stopped");
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("dead → running emits one info (recovery edge)", () => {
+    const inst = makeInstance("dead");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("recovered");
+    expect(infoCalls[0]).toContain("test-src");
+  });
+
+  test("reauth_required → running emits one info", () => {
+    const inst = makeInstance("reauth_required");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+  });
+
+  test("crashed → running emits one info", () => {
+    const inst = makeInstance("crashed");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+  });
+
+  test("broken → non-running (dead → not_authenticated) emits no info", () => {
+    const inst = makeInstance("dead");
+    lifecycle.transition(inst, "not_authenticated");
+    expect(infoCalls.length).toBe(0);
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("not_authenticated → running emits no info (was not broken)", () => {
+    const inst = makeInstance("not_authenticated");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(0);
+  });
+
+  test("full cycle: running → dead → running → dead = 2 warns + 1 info", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "running");
+    lifecycle.transition(inst, "dead");
+    expect(warnCalls.length).toBe(2);
+    expect(infoCalls.length).toBe(1);
+  });
+
+  test("instance.state field still updates on every call", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    expect(inst.state).toBe("dead");
+    lifecycle.transition(inst, "running");
+    expect(inst.state).toBe("running");
+    lifecycle.transition(inst, "pending_auth");
+    expect(inst.state).toBe("pending_auth");
+  });
+});
+
+/**
+ * Multi-step recovery via sticky-bit. The direct broken → running edge
+ * is rare in production — URL bundles recover through `pending_auth`
+ * (user clicks Reconnect, completes OAuth) and stdio bundles recover
+ * via a `restarting` intermediate. The sticky `lastBrokenState`
+ * breadcrumb on `BundleInstance` carries the broken signal across
+ * those intermediates so the recovery info still fires on the final
+ * `→ running` hop.
+ */
+describe("BundleLifecycleManager.transition — multi-step recovery (sticky-bit)", () => {
+  let lifecycle: BundleLifecycleManager;
+  let warnCalls: string[];
+  let infoCalls: string[];
+  let originalWarn: (msg: string) => void;
+  let originalInfo: (msg: string) => void;
+
+  beforeEach(() => {
+    lifecycle = new BundleLifecycleManager(makeSink(), undefined);
+    warnCalls = [];
+    infoCalls = [];
+    originalWarn = log.warn;
+    originalInfo = log.info;
+    log.warn = (msg) => {
+      warnCalls.push(msg);
+    };
+    log.info = (msg) => {
+      infoCalls.push(msg);
+    };
+  });
+
+  afterEach(() => {
+    log.warn = originalWarn;
+    log.info = originalInfo;
+  });
+
+  test("URL bundle reconnect: reauth_required → pending_auth → running emits one info", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "reauth_required");
+    lifecycle.transition(inst, "pending_auth");
+    lifecycle.transition(inst, "running");
+    expect(warnCalls.length).toBe(1);
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("reauth_required");
+    expect(infoCalls[0]).toContain("recovered");
+  });
+
+  test("dead → not_authenticated → running still emits info (sticky preserved across intermediate)", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "not_authenticated");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("dead");
+  });
+
+  test("recovery clears sticky-bit: second running after second-broken episode also fires info", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "running");
+    lifecycle.transition(inst, "reauth_required");
+    lifecycle.transition(inst, "pending_auth");
+    lifecycle.transition(inst, "running");
+    expect(warnCalls.length).toBe(2);
+    expect(infoCalls.length).toBe(2);
+    expect(infoCalls[0]).toContain("dead");
+    expect(infoCalls[1]).toContain("reauth_required");
+  });
+
+  test("sticky-bit tracks LATEST broken state across broken→broken (crashed → dead → running labels 'dead')", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "crashed");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("dead");
+  });
+
+  test("instance constructed in broken state: dead → running captures dead as label", () => {
+    // Mirrors HealthMonitor crash recovery on a bundle that came up
+    // dead before any transition flowed through this lifecycle instance.
+    const inst = makeInstance("dead");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("dead");
+  });
+
+  test("instance constructed in broken state: reauth_required → pending_auth → running captures reauth_required", () => {
+    const inst = makeInstance("reauth_required");
+    lifecycle.transition(inst, "pending_auth");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(1);
+    expect(infoCalls[0]).toContain("reauth_required");
+  });
+
+  test("operator-stop clears sticky-bit: dead → stopped → starting → running emits no info", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    lifecycle.transition(inst, "stopped");
+    lifecycle.transition(inst, "starting");
+    lifecycle.transition(inst, "running");
+    expect(warnCalls.length).toBe(1);
+    expect(infoCalls.length).toBe(0);
+  });
+
+  test("starting → running on fresh boot emits no info (never broken)", () => {
+    const inst = makeInstance("starting");
+    lifecycle.transition(inst, "running");
+    expect(infoCalls.length).toBe(0);
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("lastBrokenState field is undefined after recovery", () => {
+    const inst = makeInstance("running");
+    lifecycle.transition(inst, "dead");
+    expect(inst.lastBrokenState).toBe("dead");
+    lifecycle.transition(inst, "running");
+    expect(inst.lastBrokenState).toBeUndefined();
+  });
+});
diff --git a/test/unit/registry-tool-enumeration-resilience.test.ts b/test/unit/registry-tool-enumeration-resilience.test.ts
index a70356ee..68808ab6 100644
--- a/test/unit/registry-tool-enumeration-resilience.test.ts
+++ b/test/unit/registry-tool-enumeration-resilience.test.ts
@@ -1,4 +1,5 @@
-import { describe, expect, test } from "bun:test";
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { log } from "../../src/cli/log.ts";
 import { textContent } from "../../src/engine/content-helpers.ts";
 import type { ToolResult } from "../../src/engine/types.ts";
 import { ToolRegistry } from "../../src/tools/registry.ts";
@@ -77,3 +78,52 @@ describe("ToolRegistry.availableTools — error containment", () => {
     await expect(registry.availableTools()).resolves.toEqual([]);
   });
 });
+
+/**
+ * Issue #194: the per-skip log must not be `log.warn`. A connector
+ * stuck in a broken state would otherwise emit one warn per
+ * `availableTools()` call — once per chat turn — flooding operator
+ * stderr and any alerting that hooks the warn rate. The operator
+ * signal lives at the lifecycle transition site (one warn per
+ * non-broken → broken edge); this site is per-turn and must stay
+ * silent at warn level.
+ */
+describe("ToolRegistry.availableTools — log noise", () => {
+  let warnCalls: string[];
+  let originalWarn: (msg: string) => void;
+
+  beforeEach(() => {
+    warnCalls = [];
+    originalWarn = log.warn;
+    log.warn = (msg) => {
+      warnCalls.push(msg);
+    };
+  });
+
+  afterEach(() => {
+    log.warn = originalWarn;
+  });
+
+  test("repeated enumeration of a broken source emits zero warns", async () => {
+    const registry = new ToolRegistry();
+    registry.addSource(new BrokenSource());
+
+    for (let i = 0; i < 100; i++) {
+      await registry.availableTools();
+    }
+
+    expect(warnCalls.length).toBe(0);
+  });
+
+  test("mixed healthy + broken source enumeration also stays quiet at warn", async () => {
+    const registry = new ToolRegistry();
+    registry.addSource(new HealthySource());
+    registry.addSource(new BrokenSource());
+
+    for (let i = 0; i < 25; i++) {
+      await registry.availableTools();
+    }
+
+    expect(warnCalls.length).toBe(0);
+  });
+});