Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions PolyPilot.Tests/ProcessingWatchdogTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3096,4 +3096,41 @@ public void WatchdogCaseB_UsesFileInfoForSizeAndTime_InSource()
Assert.True(watchdogBody.Contains("fileInfo.Length"),
"Case B must read Length from FileInfo");
}

/// <summary>
/// When HasUsedToolsThisTurn is true and ActiveToolCallCount is 0 but events.jsonl
/// is fresh (the CLI is still writing), the watchdog must upgrade effectiveTimeout
/// from WatchdogUsedToolsIdleTimeoutSeconds (180s) to WatchdogToolExecutionTimeoutSeconds (600s).
/// This prevents premature completion of sessions where the SDK failed to deliver
/// ToolExecutionStartEvent for an in-flight tool.
/// </summary>
[Fact]
public void Watchdog_UsedToolsTimeout_UpgradesToToolTimeout_WhenEventsJsonlFresh()
{
var source = File.ReadAllText(
Path.Combine(GetRepoRoot(), "PolyPilot", "Services", "CopilotService.Events.cs"));
var methodIdx = source.IndexOf("private async Task RunProcessingWatchdogAsync");
var endIdx = source.IndexOf(" private readonly ConcurrentDictionary", methodIdx);
var watchdogBody = source.Substring(methodIdx, endIdx - methodIdx);

// The freshness check must gate on useUsedToolsTimeout (the 180s tier)
Assert.Contains("if (useUsedToolsTimeout && !IsDemoMode && !IsRemoteMode && startedAt.HasValue)", watchdogBody);

// Must check that the file was written AFTER this turn started (prevents prior-turn false positives)
Assert.Contains("lastWrite > startedAt.Value", watchdogBody);

// Must check file age is within the Case B freshness window
Assert.Contains("fileAge < WatchdogCaseBFreshnessSeconds", watchdogBody);

// CRITICAL: must directly assign effectiveTimeout, NOT mutate boolean flags
// (the boolean flags are already consumed by the effectiveTimeout computation above)
Assert.Contains("effectiveTimeout = WatchdogToolExecutionTimeoutSeconds", watchdogBody);

// Must NOT contain dead-store flag mutations (the original no-op bug)
var freshnessBlock = watchdogBody.Substring(
watchdogBody.IndexOf("if (useUsedToolsTimeout && !IsDemoMode", StringComparison.Ordinal));
freshnessBlock = freshnessBlock.Substring(0, freshnessBlock.IndexOf("if (elapsed >= effectiveTimeout)", StringComparison.Ordinal));
Assert.DoesNotContain("useUsedToolsTimeout = false", freshnessBlock);
Assert.DoesNotContain("useToolTimeout = true", freshnessBlock);
}
}
40 changes: 39 additions & 1 deletion PolyPilot/Services/CopilotService.Events.cs
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,19 @@ private void TryResolveDeferredIdleAfterBackgroundTaskChange(SessionState state,
? "only carry-over shell tasks remain"
: "background task set is now empty";
Debug($"[IDLE-DEFER-RESOLVE] '{sessionName}' {reason} — completing deferred turn");
var resolveGen = Interlocked.Read(ref state.ProcessingGeneration);
InvokeOnUI(() =>
{
if (state.IsOrphaned || !state.HasDeferredIdle || !state.Info.IsProcessing)
return;
if (Interlocked.Read(ref state.ProcessingGeneration) != resolveGen)
{
Debug($"[IDLE-DEFER-RESOLVE] '{sessionName}' skipped — generation mismatch " +
$"(captured={resolveGen}, current={Interlocked.Read(ref state.ProcessingGeneration)})");
return;
}

CompleteResponse(state);
CompleteResponse(state, resolveGen);
});
}

Expand Down Expand Up @@ -2659,6 +2666,37 @@ private async Task RunProcessingWatchdogAsync(SessionState state, string session
? (DateTime.UtcNow - startedAt.Value).TotalSeconds
: 0;

// When tools were used this turn but ActiveToolCallCount is 0, the SDK may have
// failed to deliver ToolExecutionStartEvent for an in-flight tool (events only
// appear in events.jsonl, not the live stream). Check events.jsonl freshness:
// if the CLI wrote recently (within the Case B freshness window AND after this
// turn started), upgrade the effective timeout to 600s so the session isn't
// prematurely completed at 180s while the CLI is still executing a long tool.
if (useUsedToolsTimeout && !IsDemoMode && !IsRemoteMode && startedAt.HasValue)
{
try
{
var sid = state.Info.SessionId;
if (!string.IsNullOrEmpty(sid))
{
var ep = Path.Combine(SessionStatePath, sid, "events.jsonl");
if (File.Exists(ep))
{
var lastWrite = File.GetLastWriteTimeUtc(ep);
var fileAge = (DateTime.UtcNow - lastWrite).TotalSeconds;
if (lastWrite > startedAt.Value && fileAge < WatchdogCaseBFreshnessSeconds)
{
// CLI wrote to events.jsonl after this turn started and within
// the freshness window — a tool is likely still running but the
// SDK didn't deliver the start event.
effectiveTimeout = WatchdogToolExecutionTimeoutSeconds;
}
}
}
}
catch { /* filesystem errors → keep original timeout */ }
}

if (elapsed >= effectiveTimeout)
{
// Defensive: if ProcessingStartedAt is null while IsProcessing is true,
Expand Down