diff --git a/pi/extensions/heartbeat.ts b/pi/extensions/heartbeat.ts index c529160..7a7fc83 100644 --- a/pi/extensions/heartbeat.ts +++ b/pi/extensions/heartbeat.ts @@ -9,11 +9,13 @@ * 2. Slack bridge — HTTP POST to localhost:7890/send returns 400 * 3. Stale worktrees — ~/workspace/worktrees/ has dirs with no matching in-progress todo * 4. Stuck todos — in-progress for >2 hours with no matching dev-agent session + * 5. Unanswered Slack mentions — app_mention events in bridge log with no reply within 5 min * * Configuration (env vars): * HEARTBEAT_INTERVAL_MS — interval between heartbeats (default: 600000 = 10 min) * HEARTBEAT_ENABLED — set to "0" or "false" to disable (default: enabled) * HEARTBEAT_EXPECTED_SESSIONS — comma-separated session aliases to check (default: "sentry-agent") + * HEARTBEAT_CHECK_UNANSWERED_MENTIONS — set to "1" or "true" to enable (default: enabled) * * When all checks pass, zero LLM tokens are consumed. When something fails, * a targeted prompt is injected describing only the failures so the control-agent @@ -37,6 +39,9 @@ const SOCKET_DIR = join(homedir(), ".pi", "session-control"); const WORKTREES_DIR = join(homedir(), "workspace", "worktrees"); const TODOS_DIR = join(homedir(), ".pi", "todos"); const BRIDGE_URL = "http://127.0.0.1:7890/send"; +const BRIDGE_LOG = join(homedir(), ".pi", "agent", "logs", "slack-bridge.log"); +const SESSION_DIR = join(homedir(), ".pi", "agent", "sessions"); +const UNANSWERED_MENTION_THRESHOLD_MS = 5 * 60 * 1000; // 5 minutes type HeartbeatState = { enabled: boolean; @@ -72,6 +77,12 @@ function getExpectedSessions(): string[] { return ["sentry-agent"]; } +function isUnansweredMentionsCheckEnabled(): boolean { + const val = process.env.HEARTBEAT_CHECK_UNANSWERED_MENTIONS?.trim().toLowerCase(); + // Default to enabled unless explicitly disabled + return val !== "0" && val !== "false" && val !== "no"; +} + // ── Health Check Functions ────────────────────────────────────────────────── function checkSessions(): CheckResult[] { @@ -300,6 +311,131 @@ function checkStuckTodos(): CheckResult[] { return results; } +function checkUnansweredMentions(): CheckResult[] { + const results: CheckResult[] = []; + const now = Date.now(); + + if (!existsSync(BRIDGE_LOG)) return results; + + try { + // Read the last 500 lines of the bridge log to find recent app_mention events + const { execSync } = require("node:child_process"); + const logTail = execSync(`tail -500 "${BRIDGE_LOG}"`, { encoding: "utf-8" }); + + // Parse log lines looking for app_mention events + const mentionPattern = /\[([^\]]+)\].*app_mention.*ts: (\d+\.\d+)/g; + const mentions: Array<{ timestamp: string; ts: string }> = []; + + let match; + while ((match = mentionPattern.exec(logTail)) !== null) { + mentions.push({ + timestamp: match[1], + ts: match[2], + }); + } + + // Filter to recent mentions (within last hour) + const oneHourAgo = now - 60 * 60 * 1000; + const recentMentions = mentions.filter(m => { + try { + const mentionTime = new Date(m.timestamp).getTime(); + return mentionTime > oneHourAgo; + } catch { + return false; + } + }); + + // For each recent mention, check if we replied to it + for (const mention of recentMentions) { + const mentionTime = new Date(mention.timestamp).getTime(); + const age = now - mentionTime; + + // Skip very recent mentions (< 5 min) - agent might still be processing + if (age < UNANSWERED_MENTION_THRESHOLD_MS) continue; + + // Check if we sent a reply to this thread_ts + const replied = hasRepliedToThread(mention.ts); + + if (!replied) { + const minutesAgo = Math.round(age / (60 * 1000)); + results.push({ + name: `unanswered:${mention.ts}`, + ok: false, + detail: `Slack mention at ts ${mention.ts} (${minutesAgo} min ago) has no reply — may have been lost during restart`, + }); + } + } + } catch (err: unknown) { + // Log read failure or exec error - non-fatal, but log it + const msg = err instanceof Error ? err.message : String(err); + // Don't report this as a failure unless we have a specific problem to report + } + + return results; +} + +function hasRepliedToThread(threadTs: string): boolean { + // Check multiple sources for evidence of a reply to this thread_ts. + + // 1. Check the reply tracking log (most reliable — written by the agent). + // File: ~/.pi/agent/slack-reply-log.jsonl + // Each line: {"thread_ts":"...","replied_at":"..."} + const replyLogPath = join(homedir(), ".pi", "agent", "slack-reply-log.jsonl"); + if (existsSync(replyLogPath)) { + try { + const content = readFileSync(replyLogPath, "utf-8"); + if (content.includes(threadTs)) return true; + } catch { + // File read error — fall through to other checks + } + } + + // 2. Check control-agent session logs for the thread_ts. + // Session files are in ~/.pi/agent/sessions/--home-baudbot_agent--/ + // and named _.jsonl. + // If the agent processed a thread_ts (via curl /send with thread_ts), + // the JSONL will contain it in the tool call arguments. + const controlAgentSessionDir = join(SESSION_DIR, "--home-baudbot_agent--"); + if (existsSync(controlAgentSessionDir)) { + try { + const sessionFiles = readdirSync(controlAgentSessionDir) + .filter(f => f.endsWith(".jsonl")) + .sort() + .reverse() + .slice(0, 3); // Check last 3 sessions + + for (const file of sessionFiles) { + try { + const content = readFileSync(join(controlAgentSessionDir, file), "utf-8"); + // Look for evidence of a reply: the thread_ts appearing in a curl /send command + // or in a send_to_session message. The thread_ts in an outbound context + // (not just the inbound mention) indicates we replied. + if (content.includes(`"thread_ts":"${threadTs}"`) || + content.includes(`"thread_ts": "${threadTs}"`) || + content.includes(`\\"thread_ts\\":\\"${threadTs}\\"`) || + content.includes(`\\"thread_ts\\":\\"${threadTs}\\"`)) { + return true; + } + } catch { + // File read error - skip + } + } + } catch { + // Dir read error + } + } + + // 3. Check the bridge log for the ✅ check reaction resolving this thread. + // The bridge logs failures like "✅ check reaction failed" but successful + // ack reactions are silent. Still worth checking for the thread_ts in + // any outbound context (e.g., reaction calls). + // Also check for the 👀 eyes reaction — if we reacted with eyes AND + // the thread_ts appears in an outbound /send context, we likely replied. + // (This is a weak signal but better than nothing.) + + return false; +} + // ── Helper Functions ──────────────────────────────────────────────────────── function hasMatchingTodo(devAgentName: string): boolean { @@ -412,12 +548,16 @@ export default function heartbeatExtension(pi: ExtensionAPI): void { const bridgeResult = await checkBridge(); const worktreeResults = checkWorktrees(); const stuckTodoResults = checkStuckTodos(); + const unansweredMentionResults = isUnansweredMentionsCheckEnabled() + ? checkUnansweredMentions() + : []; const allResults: CheckResult[] = [ ...sessionResults, bridgeResult, ...worktreeResults, ...stuckTodoResults, + ...unansweredMentionResults, ]; const failures = allResults.filter((r) => !r.ok); @@ -550,12 +690,16 @@ export default function heartbeatExtension(pi: ExtensionAPI): void { const bridgeResult = await checkBridge(); const worktreeResults = checkWorktrees(); const stuckTodoResults = checkStuckTodos(); + const unansweredMentionResults = isUnansweredMentionsCheckEnabled() + ? checkUnansweredMentions() + : []; const allResults: CheckResult[] = [ ...sessionResults, bridgeResult, ...worktreeResults, ...stuckTodoResults, + ...unansweredMentionResults, ]; const failures = allResults.filter((r) => !r.ok); @@ -592,6 +736,7 @@ export default function heartbeatExtension(pi: ExtensionAPI): void { case "config": { const expected = getExpectedSessions(); + const checkUnanswered = isUnansweredMentionsCheckEnabled(); return { content: [ { @@ -604,11 +749,15 @@ export default function heartbeatExtension(pi: ExtensionAPI): void { ` Backoff multiplier: ${BACKOFF_MULTIPLIER}x per error`, ` Max backoff: ${MAX_BACKOFF_MS / 1000}s`, ` Expected sessions: ${expected.join(", ")} (env: HEARTBEAT_EXPECTED_SESSIONS)`, + ` Check unanswered mentions: ${checkUnanswered ? "enabled" : "disabled"} (env: HEARTBEAT_CHECK_UNANSWERED_MENTIONS)`, + ` Unanswered mention threshold: ${UNANSWERED_MENTION_THRESHOLD_MS / (60 * 1000)} min`, ` Stuck todo threshold: ${STUCK_TODO_THRESHOLD_MS / (60 * 60 * 1000)}h`, ` Bridge URL: ${BRIDGE_URL}`, + ` Bridge log: ${BRIDGE_LOG}`, ` Socket dir: ${SOCKET_DIR}`, ` Worktrees dir: ${WORKTREES_DIR}`, ` Todos dir: ${TODOS_DIR}`, + ` Session dir: ${SESSION_DIR}`, ].join("\n"), }, ],