Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions src/Conclave.App/Claude/ClaudeClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ public async IAsyncEnumerable<StreamJsonEvent> StreamAsync(
psi.ArgumentList.Add(claudeSessionId);
}

// Spawn off the UI thread: Process.Start with three redirected pipes can block the
// calling thread long enough to swallow the layout pass for the just-appended user
// message, so the user's bubble doesn't paint until claude itself starts streaming.
using var proc = await Task.Run(() => Process.Start(psi), ct)
?? throw new InvalidOperationException("failed to spawn claude");

try
{
// Spawn off the UI thread: Process.Start with three redirected pipes can block the
// calling thread long enough to swallow the layout pass for the just-appended user
// message, so the user's bubble doesn't paint until claude itself starts streaming.
using var proc = await Task.Run(() => Process.Start(psi), ct)
?? throw new InvalidOperationException("failed to spawn claude");

// Collect stderr concurrently so it doesn't fill the pipe and deadlock us.
var stderrBuf = new StringBuilder();
proc.ErrorDataReceived += (_, e) => { if (e.Data != null) stderrBuf.AppendLine(e.Data); };
Expand Down Expand Up @@ -148,6 +148,17 @@ public async IAsyncEnumerable<StreamJsonEvent> StreamAsync(
}
finally
{
// If the caller cancelled (Stop button, auto-resume on a stalled session, app shutdown),
// proactively kill the subprocess + its tree. The pipe-readers respect ct and exit
// promptly, but `using var proc` only disposes our handles — the child claude process
// can keep running until its own pipes break, which on a stalled network can take a
// while. Killing here means StallDetectionService's WaitAsync(5s) reliably observes
// a clean unwind before sending "continue".
if (ct.IsCancellationRequested)
{
try { if (!proc.HasExited) proc.Kill(entireProcessTree: true); }
catch { /* already exited or no permission — best-effort */ }
}
// Best-effort: claude has already read the file by the time it spawns its
// own MCP client, so a delete here is safe. Swallow IO errors — leaving a
// temp file behind is preferable to crashing the turn on cleanup.
Expand Down
44 changes: 40 additions & 4 deletions src/Conclave.App/Claude/ClaudeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,23 @@ public sealed class ClaudeService

public ClaudeService(SessionManager manager) => _manager = manager;

public async Task RunTurnAsync(SessionVm session, string prompt, CancellationToken ct = default)
public Task RunTurnAsync(SessionVm session, string prompt, CancellationToken ct = default)
=> RunTurnAsync(session, prompt, isAutoResume: false, ct);

public async Task RunTurnAsync(SessionVm session, string prompt, bool isAutoResume, CancellationToken ct = default)
{
// Per-session cancellation. Combined so either the caller's token or the Cancel
// button on the SessionVm kills the turn.
using var internalCts = new CancellationTokenSource();
using var linked = CancellationTokenSource.CreateLinkedTokenSource(ct, internalCts.Token);
session.CancellationSource = internalCts;
// Reset the stall timestamp at turn start so a stalled prior turn doesn't immediately
// re-flag this fresh one before any events have had time to arrive. StallDetectionService
// also clears IsStalled when it sees a turn enter Working — but stamping here closes
// the race where the timer fires between RunTurnAsync's UpdateStatus(Working) and the
// first stream event.
session.LastStreamEventAt = DateTime.UtcNow;
session.IsStalled = false;

// Per-turn permission router. Registered with the shared MCP server so claude
// can call our permission_prompt tool over HTTP; cancelled in finally so a
Expand All @@ -48,6 +58,7 @@ public async Task RunTurnAsync(SessionVm session, string prompt, CancellationTok
Role = MessageRole.User,
Time = Now(),
Content = prompt,
IsAutoResume = isAutoResume,
};
session.AppendTranscript(userMsg);
_manager.PersistMessage(session, userMsg);
Expand Down Expand Up @@ -106,9 +117,15 @@ public async Task RunTurnAsync(SessionVm session, string prompt, CancellationTok
catch (OperationCanceledException)
{
// User cancelled — not an error. Flip status to Idle; do not write an
// "[error]" transcript entry.
Log(session, LogLevel.Wrn, "Turn cancelled by user");
_manager.UpdateStatus(session, SessionStatus.Idle);
// "[error]" transcript entry. When StallDetectionService is the canceller
// (auto-resume case) it sets SuppressNextTurnCompleteNotification so the
// user doesn't see a "turn complete" toast for what's internally a restart.
var suppress = session.SuppressNextTurnCompleteNotification;
session.SuppressNextTurnCompleteNotification = false;
Log(session, LogLevel.Wrn, suppress
? "Turn cancelled for auto-resume after stall"
: "Turn cancelled by user");
_manager.UpdateStatus(session, SessionStatus.Idle, suppressNotification: suppress);
}
catch (Exception ex)
{
Expand All @@ -128,6 +145,12 @@ public async Task RunTurnAsync(SessionVm session, string prompt, CancellationTok
finally
{
session.CancellationSource = null;
session.CurrentTurnTask = null;
// Defensive cleanup: if the turn ended via a real ResultEvent rather than the
// OCE catch (e.g. the network recovered mid-cancel), the OCE handler never ran
// and the auto-resume flag is still set. Clearing here means the next legit
// turn-complete won't get its notification suppressed by a stale flag.
session.SuppressNextTurnCompleteNotification = false;
// Release any in-flight permission prompts so the MCP handler unwinds and
// claude doesn't sit on an orphaned request.
permHandler?.CancelAll();
Expand Down Expand Up @@ -176,6 +199,13 @@ private void Handle(
Dictionary<string, LiveAssistantState> liveByMessageId,
PermissionTurnHandler? permHandler)
{
// StallDetectionService checks how long ago the last event was — stamp on every
// type so a long-running tool (no text deltas for minutes) doesn't false-positive.
// Clearing IsStalled here also lets a session that briefly stalled then recovered
// drop out of needs-attention without manual intervention.
session.LastStreamEventAt = DateTime.UtcNow;
if (session.IsStalled) session.IsStalled = false;

switch (ev)
{
case SystemInitEvent init:
Expand Down Expand Up @@ -215,6 +245,12 @@ private void Handle(
break;

case ResultEvent res:
// Record the result-arrival time before flipping status so StallDetectionService
// can detect the "result landed mid-cancel" race and skip a piggyback auto-resume.
session.LastResultEventAt = DateTime.UtcNow;
// A clean turn-complete resets the auto-resume retry budget so the next stall
// gets the same one-shot it would have on a fresh session.
session.AutoResumeAttempts = 0;
var finalStatus = res.IsError && !IsInterrupt(res)
? SessionStatus.Error
: SessionStatus.Idle;
Expand Down
20 changes: 19 additions & 1 deletion src/Conclave.App/MainWindow.axaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using Avalonia.Controls;
using Conclave.App.Claude;
using Conclave.App.Design;
using Conclave.App.Platform;
using Conclave.App.Sessions;
using Conclave.App.ViewModels;
using Conclave.App.Views.Shell;
Expand All @@ -17,6 +18,7 @@ public partial class MainWindow : Window

private SessionManager? _manager;
private AutoCleanupService? _autoCleanup;
private StallDetectionService? _stallDetection;
private PermissionMcpServer? _permissions;
private ShellVm? _shell;
// Written on the UI thread from Activated/Deactivated, read by NotificationService
Expand Down Expand Up @@ -86,13 +88,28 @@ public MainWindow()
capabilities.BeginProbe();
_shell = new ShellVm(tokens, _manager, capabilities);
StartupLog.Mark("MainWindow ctor: ShellVm built");
_shell.SendRequested += (session, prompt) => claudeService.RunTurnAsync(session, prompt);
// Capture the in-flight task on the SessionVm so StallDetectionService can await
// it cleanly when cancelling for auto-resume. The fire-and-forget shape means the
// returned Task would otherwise be lost.
_shell.SendRequested += (session, prompt) =>
{
var task = claudeService.RunTurnAsync(session, prompt);
session.CurrentTurnTask = task;
return task;
};
_shell.PropertyChanged += OnShellPropertyChanged;

_autoCleanup = new AutoCleanupService(_manager);
_shell.AutoCleanup = _autoCleanup;
_autoCleanup.Start();

// Detect stalled claude turns (e.g. after waking from sleep) and — if the user
// has opted in — silently send "continue" to resume them. Cross-platform wake
// detection via clock-jump heartbeat avoids any OS-specific bindings.
_stallDetection = new StallDetectionService(_manager, claudeService, new HeartbeatPowerService());
_shell.StallDetection = _stallDetection;
_stallDetection.Start();

DataContext = _shell;

Activated += (_, _) => _isWindowActive = true;
Expand All @@ -105,6 +122,7 @@ public MainWindow()
// the MCP listener so an in-flight permission HTTP response can still be
// written back. Closing the listener first races the response onto a closed
// socket and claude sees a connection error instead of a clean deny.
_stallDetection?.Dispose();
_autoCleanup?.Dispose();
_manager?.Dispose();
_permissions?.Dispose();
Expand Down
58 changes: 58 additions & 0 deletions src/Conclave.App/Platform/HeartbeatPowerService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
namespace Conclave.App.Platform;

// Cross-platform wake detector. Runs a low-frequency heartbeat loop and fires DeviceWoke
// whenever a tick lands significantly later than scheduled — a strong signal that the OS
// suspended the process (laptop sleep, Windows hybrid sleep, hibernation). Works on every
// platform without OS-specific bindings, which keeps the AOT story simple and avoids the
// brittle ObjC-block marshalling that NSWorkspaceDidWakeNotification would otherwise need.
//
// Trade-off: there's a small detection latency equal to TickInterval (1.5s). For waking
// from sleep that's imperceptible. False positives can occur if the process is starved of
// CPU for several seconds (debugger pause, severe GC); StallDetectionService treats wake
// events as a hint to scan, not a definitive stall signal, so this is harmless.
public sealed class HeartbeatPowerService : IPlatformPowerService
{
private static readonly TimeSpan TickInterval = TimeSpan.FromSeconds(1.5);

// A tick that takes more than this long beyond TickInterval is treated as a wake.
// 8s gives generous headroom for "the GC paused us" without missing actual sleeps,
// which on macOS routinely register as 30s+ jumps even on short lid-closes.
private static readonly TimeSpan WakeThreshold = TimeSpan.FromSeconds(8);

private readonly CancellationTokenSource _cts = new();
private Task? _loop;

public event Action? DeviceWoke;

public void Start()
{
if (_loop is not null) return;
_loop = Task.Run(() => RunAsync(_cts.Token));
}

private async Task RunAsync(CancellationToken ct)
{
var expected = DateTime.UtcNow + TickInterval;
while (!ct.IsCancellationRequested)
{
try { await Task.Delay(TickInterval, ct); }
catch (TaskCanceledException) { return; }

var now = DateTime.UtcNow;
var drift = now - expected;
if (drift > WakeThreshold)
{
try { DeviceWoke?.Invoke(); } catch { /* never let a subscriber kill the loop */ }
}
// Re-base off "now" rather than expected+TickInterval so a single late tick
// doesn't keep firing wake events for several intervals after a long sleep.
expected = now + TickInterval;
}
}

public void Dispose()
{
try { _cts.Cancel(); } catch { }
_cts.Dispose();
}
}
11 changes: 11 additions & 0 deletions src/Conclave.App/Platform/IPlatformPowerService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Conclave.App.Platform;

// Cross-platform OS-power events. v1 only fires DeviceWoke (resume from sleep) — that's
// the signal StallDetectionService cares about, since a sleeping laptop is the dominant
// cause of a stalled claude turn. Implementations may fire the event from a non-UI
// thread; subscribers must marshal to the UI thread before mutating VMs.
public interface IPlatformPowerService : IDisposable
{
event Action? DeviceWoke;
void Start();
}
16 changes: 12 additions & 4 deletions src/Conclave.App/Sessions/Database.cs
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ UPDATE session_worktrees
)
WHERE repo_path = '';
"""),
(12, """
-- Synthetic "continue" prompts that StallDetectionService sends when auto-resuming
-- a stalled session. Hidden in the transcript UI so the conversation reads
-- naturally; the assistant's resumed reply still shows.
ALTER TABLE messages ADD COLUMN is_auto_resume INTEGER NOT NULL DEFAULT 0;
"""),
};

// Explicit column lists so ordinal mapping in Read*() stays stable.
Expand All @@ -126,7 +132,7 @@ UPDATE session_worktrees
"claude_session_id, plan_json, permission_mode, total_cost_usd, pr_merged_at, " +
"pending_preamble";
private const string MessageColumns =
"id, session_id, role, content, tools_json, created_at, seq, claude_uuid";
"id, session_id, role, content, tools_json, created_at, seq, claude_uuid, is_auto_resume";

private Database(SqliteConnection conn) => _conn = conn;

Expand Down Expand Up @@ -426,6 +432,7 @@ public IReadOnlyList<MessageRow> GetMessages(string sessionId)
CreatedAt = r.GetInt64(5),
Seq = r.GetInt32(6),
ClaudeUuid = Str(r, 7),
IsAutoResume = r.GetInt32(8) != 0,
};

public int NextSeq(string sessionId)
Expand All @@ -439,12 +446,13 @@ public int NextSeq(string sessionId)
}

public void InsertMessage(MessageRow m) => Exec(
"INSERT INTO messages (id, session_id, role, content, tools_json, created_at, seq, claude_uuid) " +
"VALUES ($id, $sessionId, $role, $content, $toolsJson, $createdAt, $seq, $claudeUuid);",
"INSERT INTO messages (id, session_id, role, content, tools_json, created_at, seq, claude_uuid, is_auto_resume) " +
"VALUES ($id, $sessionId, $role, $content, $toolsJson, $createdAt, $seq, $claudeUuid, $isAutoResume);",
("$id", m.Id), ("$sessionId", m.SessionId), ("$role", m.Role),
("$content", m.Content), ("$toolsJson", (object?)m.ToolsJson),
("$createdAt", m.CreatedAt), ("$seq", m.Seq),
("$claudeUuid", (object?)m.ClaudeUuid));
("$claudeUuid", (object?)m.ClaudeUuid),
("$isAutoResume", m.IsAutoResume ? 1 : 0));

public void UpdateMessageClaudeUuid(string id, string? uuid) => Exec(
"UPDATE messages SET claude_uuid = $u WHERE id = $id;",
Expand Down
3 changes: 3 additions & 0 deletions src/Conclave.App/Sessions/MessageRow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ public sealed record MessageRow
// user messages and for messages persisted before this column was added. Captured to
// enable future fork-at-message paths that target claude's own JSONL session storage.
public string? ClaudeUuid { get; init; }
// True for synthetic "continue" prompts injected by StallDetectionService when
// auto-resuming a stalled session. Used to hide the user bubble in the transcript.
public bool IsAutoResume { get; init; }
}
15 changes: 13 additions & 2 deletions src/Conclave.App/Sessions/SessionManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ public void LoadTranscriptIfNeeded(SessionVm s)
.ToLocalTime().ToString("HH:mm"),
Content = row.Content,
ClaudeUuid = row.ClaudeUuid,
IsAutoResume = row.IsAutoResume,
};
if (!string.IsNullOrEmpty(row.ToolsJson))
{
Expand All @@ -380,6 +381,7 @@ public void PersistMessage(SessionVm session, TranscriptMessageVm msg)
CreatedAt = Database.Now(),
Seq = _db.NextSeq(session.Id),
ClaudeUuid = msg.ClaudeUuid,
IsAutoResume = msg.IsAutoResume,
});
}

Expand Down Expand Up @@ -459,11 +461,17 @@ private IEnumerable<ToolCallVm> DeserializeTools(string json)
}
}

public void UpdateStatus(SessionVm s, SessionStatus status)
public void UpdateStatus(SessionVm s, SessionStatus status, bool suppressNotification = false)
{
var previous = s.Status;
_db.UpdateSessionStatus(s.Id, status.ToString());
s.Status = status;
// IsStalled is an overlay on Working/RunningTool — once the session leaves that
// family (user clicked Stop, ResultEvent landed, an exception flipped us to Error),
// the flag is meaningless and would otherwise pin the session in "Needs attention"
// forever and keep the stall banner visible on an idle session.
if (status is not (SessionStatus.Working or SessionStatus.RunningTool) && s.IsStalled)
s.IsStalled = false;
// Only "claude is done" transitions bump the session — intermediate Working /
// RunningTool flips during a turn must not reorder the sidebar.
if (status is SessionStatus.Idle or SessionStatus.Error)
Expand All @@ -472,7 +480,10 @@ public void UpdateStatus(SessionVm s, SessionStatus status)
BumpToTop(s);
// Only notify on a real busy→done transition. App-load resets and session
// creation also pass through here at Idle but stay Idle — those mustn't fire.
if (previous is SessionStatus.Working or SessionStatus.RunningTool)
// suppressNotification is set by StallDetectionService on the cancel-half of
// an auto-resume so the user doesn't get a "turn complete" toast for what is
// really just an internal restart.
if (!suppressNotification && previous is SessionStatus.Working or SessionStatus.RunningTool)
Notifications?.NotifyTurnComplete(s.Title, status == SessionStatus.Error);
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/Conclave.App/Sessions/SettingsKeys.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ public static class SettingsKeys
public const string AutoCleanupDays = "auto_cleanup.days";
public const string NotificationsEnabled = "notifications.enabled";
public const string ClaudeVersion = "claude.version";
public const string AutoResumeStalledSessions = "stall_detection.auto_resume";

public const int DefaultAutoCleanupDays = 7;

Expand All @@ -24,4 +25,9 @@ public static int ReadAutoCleanupDays(Database db)
// explicit "false" disables notifications.
public static bool ReadNotificationsEnabled(Database db) =>
!string.Equals(db.GetSetting(NotificationsEnabled), "false", StringComparison.OrdinalIgnoreCase);

// Default-off: stall detection is always on (sessions get flagged as needs-attention),
// but Conclave only auto-sends "continue" when the user explicitly opts in.
public static bool ReadAutoResumeStalledSessions(Database db) =>
string.Equals(db.GetSetting(AutoResumeStalledSessions), "true", StringComparison.OrdinalIgnoreCase);
}
Loading
Loading