From 060db0ff710ce36a65711c5aca04cb02c1e123ae Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Thu, 2 Apr 2026 15:26:20 -0500 Subject: [PATCH 1/4] fix: smart turn-completion detection eliminates 600s delay for clean turns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IsSessionStillProcessing now checks whether assistant.turn_end or assistant.message is followed by tool.execution_start in the last N events. If no tools are pending, the turn completed cleanly and the session is treated as idle — no need to wait 600s for the watchdog. This eliminates the false "Working..." spinner after relaunch for sessions that finished a clean turn (no tools) while PolyPilot was down. Sessions with pending tool calls still correctly get the 600s watchdog timeout. New GetTailEventTypes helper reads last 8KB of events.jsonl and returns event types in reverse order for efficient tail analysis. 3 new tests: - TurnEndWithoutPendingTools_ReturnsFalse - TurnEndWithPendingTools_ReturnsTrue - MessageWithContentNoTools_ReturnsFalse Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/StuckSessionRecoveryTests.cs | 82 ++++++++++++++++++- .../Services/CopilotService.Utilities.cs | 81 ++++++++++++++++-- 2 files changed, 153 insertions(+), 10 deletions(-) diff --git a/PolyPilot.Tests/StuckSessionRecoveryTests.cs b/PolyPilot.Tests/StuckSessionRecoveryTests.cs index 805f62d01..a5fe84d47 100644 --- a/PolyPilot.Tests/StuckSessionRecoveryTests.cs +++ b/PolyPilot.Tests/StuckSessionRecoveryTests.cs @@ -303,8 +303,6 @@ public void StalenessThreshold_UsesWatchdogToolExecutionTimeout() [InlineData("assistant.reasoning")] [InlineData("assistant.reasoning_delta")] [InlineData("assistant.intent")] - [InlineData("assistant.turn_end")] // between tool rounds — still processing - [InlineData("assistant.message")] // mid-turn text — still processing [InlineData("tool.execution_end")] // tool just completed — still processing public void IsSessionStillProcessing_AllActiveEventTypes_ReturnTrue(string eventType) { @@ -353,6 +351,86 @@ public void IsSessionStillProcessing_TerminalEventTypes_ReturnFalse(string event } } + // --- Smart completion: assistant.turn_end/message without pending tools --- + + [Fact] + public void IsSessionStillProcessing_TurnEndWithoutPendingTools_ReturnsFalse() + { + // Turn completed cleanly — no tool.execution_start after the last turn_end. + var svc = CreateService(); + var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); + var sessionId = Guid.NewGuid().ToString(); + var sessionDir = Path.Combine(tmpDir, sessionId); + Directory.CreateDirectory(sessionDir); + var eventsFile = Path.Combine(sessionDir, "events.jsonl"); + + try + { + File.WriteAllText(eventsFile, + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{"content":"hello"}}""" + "\n" + + """{"type":"assistant.turn_end","data":{}}"""); + var result = svc.IsSessionStillProcessing(sessionId, tmpDir); + Assert.False(result, "Clean turn_end without pending tools should not report still processing"); + } + finally { Directory.Delete(tmpDir, true); } + } + + [Fact] + public void IsSessionStillProcessing_TurnEndWithPendingTools_ReturnsTrue() + { + // Turn ended between tool rounds — tool.execution_start is pending. + var svc = CreateService(); + var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); + var sessionId = Guid.NewGuid().ToString(); + var sessionDir = Path.Combine(tmpDir, sessionId); + Directory.CreateDirectory(sessionDir); + var eventsFile = Path.Combine(sessionDir, "events.jsonl"); + + try + { + File.WriteAllText(eventsFile, + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{}}""" + "\n" + + """{"type":"tool.execution_start","data":{}}""" + "\n" + + """{"type":"tool.execution_complete","data":{}}""" + "\n" + + """{"type":"assistant.turn_end","data":{}}""" + "\n" + + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{}}""" + "\n" + + """{"type":"tool.execution_start","data":{}}""" + "\n" + + """{"type":"tool.execution_complete","data":{}}""" + "\n" + + """{"type":"assistant.turn_end","data":{}}""" + "\n" + + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{}}""" + "\n" + + """{"type":"tool.execution_start","data":{}}"""); + var result = svc.IsSessionStillProcessing(sessionId, tmpDir); + Assert.True(result, "turn_end followed by tool.execution_start should report still processing"); + } + finally { Directory.Delete(tmpDir, true); } + } + + [Fact] + public void IsSessionStillProcessing_MessageWithContentNoTools_ReturnsFalse() + { + // Final assistant.message with content, no pending tools — session done. + var svc = CreateService(); + var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); + var sessionId = Guid.NewGuid().ToString(); + var sessionDir = Path.Combine(tmpDir, sessionId); + Directory.CreateDirectory(sessionDir); + var eventsFile = Path.Combine(sessionDir, "events.jsonl"); + + try + { + File.WriteAllText(eventsFile, + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{"content":"Here is your answer."}}"""); + var result = svc.IsSessionStillProcessing(sessionId, tmpDir); + Assert.False(result, "assistant.message without pending tools should not report still processing"); + } + finally { Directory.Delete(tmpDir, true); } + } + // --- Resume branch selection: RESUME-ACTIVE vs RESUME-QUIESCE --- [Fact] diff --git a/PolyPilot/Services/CopilotService.Utilities.cs b/PolyPilot/Services/CopilotService.Utilities.cs index 74c84f06f..fbac19940 100644 --- a/PolyPilot/Services/CopilotService.Utilities.cs +++ b/PolyPilot/Services/CopilotService.Utilities.cs @@ -136,15 +136,38 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) var type = doc.RootElement.GetProperty("type").GetString(); if (type == null) return false; // Corrupt/partial event — treat as terminal - // Use a blacklist of terminal events rather than a whitelist of active ones. - // Any event that is NOT terminal means the session is still processing. - // The old whitelist missed intermediate states like assistant.turn_end (between - // tool rounds), assistant.message, and tool.execution_complete, causing - // actively-processing sessions to be incorrectly detected as idle on restore. - // session.idle is ephemeral (never on disk). session.start means session was - // created but never used — not actively processing. All are non-active states. + // Definitive non-processing states (session.idle is ephemeral — never on disk, + // but included for correctness if the CLI ever changes). var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start" }; - return !terminalEvents.Contains(type); + if (terminalEvents.Contains(type)) return false; + + // Smart completion: if the last event is assistant.turn_end or assistant.message, + // check whether any tool.execution_start is pending (unmatched by a subsequent + // turn_end). If no tools are pending, the turn completed cleanly — the session + // is idle even without session.idle on disk. + // This eliminates the 600s watchdog delay for clean turn completions (no tools). + if (type is "assistant.turn_end" or "assistant.message") + { + var tailTypes = GetTailEventTypes(eventsFile, 10); + if (tailTypes != null) + { + // Walk backwards: if we find tool.execution_start before hitting + // assistant.turn_end or session.resume, tools are still pending. + bool hasPendingTools = false; + foreach (var evt in tailTypes) + { + if (evt == "tool.execution_start") { hasPendingTools = true; break; } + if (evt is "assistant.turn_end" or "session.resume") break; + } + if (!hasPendingTools) + { + Debug($"[RESTORE] events.jsonl for '{sessionId}' ends with {type} and no pending tools — treating as idle"); + return false; + } + } + } + + return true; } catch { return false; } } @@ -186,6 +209,48 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) } } + /// + /// Reads the last N event types from events.jsonl in reverse order (most recent first). + /// Uses a tail-read (last 8KB) to avoid full-file scan on large sessions. + /// Returns null on error. + /// + internal static List? GetTailEventTypes(string eventsFilePath, int count) + { + try + { + if (!File.Exists(eventsFilePath)) return null; + + const int tailBytes = 8192; + using var fs = new FileStream(eventsFilePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); + if (fs.Length == 0) return null; + + var offset = Math.Max(0, fs.Length - tailBytes); + fs.Seek(offset, SeekOrigin.Begin); + using var reader = new StreamReader(fs); + var lines = new List(); + while (reader.ReadLine() is { } line) + { + if (!string.IsNullOrWhiteSpace(line)) + lines.Add(line); + } + + // Parse types in reverse order (most recent first) + var result = new List(); + for (int i = lines.Count - 1; i >= 0 && result.Count < count; i--) + { + try + { + using var doc = JsonDocument.Parse(lines[i]); + var t = doc.RootElement.GetProperty("type").GetString(); + if (t != null) result.Add(t); + } + catch { } + } + return result; + } + catch { return null; } + } + /// /// Checks whether events.jsonl shows a session that was interrupted mid-tool-execution. /// Returns true if the last non-control events are tool.execution_start without matching From f52ab0172d9fe6df336554a8e0f6a2824e47191d Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Thu, 2 Apr 2026 15:45:02 -0500 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20address=20PR=20#475=20R1=20review=20?= =?UTF-8?q?=E2=80=94=20cleaner=20smart=20completion=20+=20missing=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Move assistant.turn_end to terminal events (was dead code in the smart scan — the backward loop immediately broke on itself). 2. Smart scan now only runs for assistant.message, skipping tailTypes[0] (the message itself) and scanning earlier events for pending tools. 3. Increased scan window from 10 to 30 events for event-dense turns. 4. Added missing test: MessageWithPendingToolInSameTurn_ReturnsTrue — the key scenario where the smart scan actually fires and matters. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/StuckSessionRecoveryTests.cs | 28 ++++++++++++++++- .../Services/CopilotService.Utilities.cs | 30 +++++++++---------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/PolyPilot.Tests/StuckSessionRecoveryTests.cs b/PolyPilot.Tests/StuckSessionRecoveryTests.cs index a5fe84d47..2b2040ae8 100644 --- a/PolyPilot.Tests/StuckSessionRecoveryTests.cs +++ b/PolyPilot.Tests/StuckSessionRecoveryTests.cs @@ -329,7 +329,8 @@ public void IsSessionStillProcessing_AllActiveEventTypes_ReturnTrue(string event [InlineData("session.idle")] [InlineData("session.error")] [InlineData("session.shutdown")] - [InlineData("session.start")] // created but never used — not actively processing + [InlineData("session.start")] // created but never used — not actively processing + [InlineData("assistant.turn_end")] // turn finished — clean completion public void IsSessionStillProcessing_TerminalEventTypes_ReturnFalse(string eventType) { var svc = CreateService(); @@ -431,6 +432,31 @@ public void IsSessionStillProcessing_MessageWithContentNoTools_ReturnsFalse() finally { Directory.Delete(tmpDir, true); } } + [Fact] + public void IsSessionStillProcessing_MessageWithPendingToolInSameTurn_ReturnsTrue() + { + // assistant.message as last event, but tool.execution_start earlier in same turn + // → smart scan should detect the pending tool and report still processing. + var svc = CreateService(); + var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); + var sessionId = Guid.NewGuid().ToString(); + var sessionDir = Path.Combine(tmpDir, sessionId); + Directory.CreateDirectory(sessionDir); + var eventsFile = Path.Combine(sessionDir, "events.jsonl"); + + try + { + File.WriteAllText(eventsFile, + """{"type":"assistant.turn_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{}}""" + "\n" + + """{"type":"tool.execution_start","data":{}}""" + "\n" + + """{"type":"assistant.message","data":{"content":"Running the tool now..."}}"""); + var result = svc.IsSessionStillProcessing(sessionId, tmpDir); + Assert.True(result, "assistant.message with pending tool.execution_start should report still processing"); + } + finally { Directory.Delete(tmpDir, true); } + } + // --- Resume branch selection: RESUME-ACTIVE vs RESUME-QUIESCE --- [Fact] diff --git a/PolyPilot/Services/CopilotService.Utilities.cs b/PolyPilot/Services/CopilotService.Utilities.cs index fbac19940..52d40f0a3 100644 --- a/PolyPilot/Services/CopilotService.Utilities.cs +++ b/PolyPilot/Services/CopilotService.Utilities.cs @@ -136,28 +136,28 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) var type = doc.RootElement.GetProperty("type").GetString(); if (type == null) return false; // Corrupt/partial event — treat as terminal - // Definitive non-processing states (session.idle is ephemeral — never on disk, - // but included for correctness if the CLI ever changes). - var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start" }; + // session.idle is ephemeral (never on disk). assistant.turn_end means the + // sub-turn finished — if it's the last event, the session completed cleanly. + var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start", "assistant.turn_end" }; if (terminalEvents.Contains(type)) return false; - // Smart completion: if the last event is assistant.turn_end or assistant.message, - // check whether any tool.execution_start is pending (unmatched by a subsequent - // turn_end). If no tools are pending, the turn completed cleanly — the session - // is idle even without session.idle on disk. + // Smart completion for assistant.message: check whether any tool.execution_start + // is pending in the recent event history. If the model wrote a message and no + // tool was started after it, the turn completed cleanly. // This eliminates the 600s watchdog delay for clean turn completions (no tools). - if (type is "assistant.turn_end" or "assistant.message") + if (type is "assistant.message") { - var tailTypes = GetTailEventTypes(eventsFile, 10); - if (tailTypes != null) + var tailTypes = GetTailEventTypes(eventsFile, 30); + if (tailTypes != null && tailTypes.Count > 1) { - // Walk backwards: if we find tool.execution_start before hitting - // assistant.turn_end or session.resume, tools are still pending. + // Skip tailTypes[0] (the assistant.message we already know about). + // Walk backwards through earlier events: if we find tool.execution_start + // before hitting assistant.turn_end or session.resume, tools are pending. bool hasPendingTools = false; - foreach (var evt in tailTypes) + for (int i = 1; i < tailTypes.Count; i++) { - if (evt == "tool.execution_start") { hasPendingTools = true; break; } - if (evt is "assistant.turn_end" or "session.resume") break; + if (tailTypes[i] == "tool.execution_start") { hasPendingTools = true; break; } + if (tailTypes[i] is "assistant.turn_end" or "session.resume") break; } if (!hasPendingTools) { From c96ef80764820683647466772fcb131c0d5de5ba Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Fri, 3 Apr 2026 13:36:38 -0500 Subject: [PATCH 3/4] fix: handle turn_end after tool rounds Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- PolyPilot.Tests/StuckSessionRecoveryTests.cs | 33 +++++++++-- .../Services/CopilotService.Utilities.cs | 55 +++++++++++-------- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/PolyPilot.Tests/StuckSessionRecoveryTests.cs b/PolyPilot.Tests/StuckSessionRecoveryTests.cs index 2b2040ae8..c3508d1bc 100644 --- a/PolyPilot.Tests/StuckSessionRecoveryTests.cs +++ b/PolyPilot.Tests/StuckSessionRecoveryTests.cs @@ -330,7 +330,6 @@ public void IsSessionStillProcessing_AllActiveEventTypes_ReturnTrue(string event [InlineData("session.error")] [InlineData("session.shutdown")] [InlineData("session.start")] // created but never used — not actively processing - [InlineData("assistant.turn_end")] // turn finished — clean completion public void IsSessionStillProcessing_TerminalEventTypes_ReturnFalse(string eventType) { var svc = CreateService(); @@ -380,7 +379,8 @@ public void IsSessionStillProcessing_TurnEndWithoutPendingTools_ReturnsFalse() [Fact] public void IsSessionStillProcessing_TurnEndWithPendingTools_ReturnsTrue() { - // Turn ended between tool rounds — tool.execution_start is pending. + // Turn ended after a tool round. This can still be between rounds, so restore + // must stay conservative and keep the session active until the next events arrive. var svc = CreateService(); var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); var sessionId = Guid.NewGuid().ToString(); @@ -395,17 +395,38 @@ public void IsSessionStillProcessing_TurnEndWithPendingTools_ReturnsTrue() """{"type":"assistant.message","data":{}}""" + "\n" + """{"type":"tool.execution_start","data":{}}""" + "\n" + """{"type":"tool.execution_complete","data":{}}""" + "\n" + - """{"type":"assistant.turn_end","data":{}}""" + "\n" + + """{"type":"assistant.turn_end","data":{}}"""); + var result = svc.IsSessionStillProcessing(sessionId, tmpDir); + Assert.True(result, "turn_end after tool activity in the same sub-turn should report still processing"); + } + finally { Directory.Delete(tmpDir, true); } + } + + [Fact] + public void IsSessionStillProcessing_TurnEndNoCurrentToolsAfterEarlierToolRound_ReturnsFalse() + { + // Older tool rounds must not leak across assistant.turn_start boundaries and keep a + // later clean no-tool turn alive. + var svc = CreateService(); + var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); + var sessionId = Guid.NewGuid().ToString(); + var sessionDir = Path.Combine(tmpDir, sessionId); + Directory.CreateDirectory(sessionDir); + var eventsFile = Path.Combine(sessionDir, "events.jsonl"); + + try + { + File.WriteAllText(eventsFile, """{"type":"assistant.turn_start","data":{}}""" + "\n" + """{"type":"assistant.message","data":{}}""" + "\n" + """{"type":"tool.execution_start","data":{}}""" + "\n" + """{"type":"tool.execution_complete","data":{}}""" + "\n" + """{"type":"assistant.turn_end","data":{}}""" + "\n" + """{"type":"assistant.turn_start","data":{}}""" + "\n" + - """{"type":"assistant.message","data":{}}""" + "\n" + - """{"type":"tool.execution_start","data":{}}"""); + """{"type":"assistant.message","data":{"content":"final answer"}}""" + "\n" + + """{"type":"assistant.turn_end","data":{}}"""); var result = svc.IsSessionStillProcessing(sessionId, tmpDir); - Assert.True(result, "turn_end followed by tool.execution_start should report still processing"); + Assert.False(result, "A clean no-tool turn_end should not be kept active by tools from an earlier round"); } finally { Directory.Delete(tmpDir, true); } } diff --git a/PolyPilot/Services/CopilotService.Utilities.cs b/PolyPilot/Services/CopilotService.Utilities.cs index 52d40f0a3..5db6491cf 100644 --- a/PolyPilot/Services/CopilotService.Utilities.cs +++ b/PolyPilot/Services/CopilotService.Utilities.cs @@ -136,34 +136,23 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) var type = doc.RootElement.GetProperty("type").GetString(); if (type == null) return false; // Corrupt/partial event — treat as terminal - // session.idle is ephemeral (never on disk). assistant.turn_end means the - // sub-turn finished — if it's the last event, the session completed cleanly. - var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start", "assistant.turn_end" }; + // session.idle is ephemeral (never on disk). session.start can also be the only + // on-disk event for a never-used session, so treat it as terminal on restore. + var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start" }; if (terminalEvents.Contains(type)) return false; - // Smart completion for assistant.message: check whether any tool.execution_start - // is pending in the recent event history. If the model wrote a message and no - // tool was started after it, the turn completed cleanly. - // This eliminates the 600s watchdog delay for clean turn completions (no tools). - if (type is "assistant.message") + // Smart completion for assistant.message / assistant.turn_end: session.idle is not + // written to disk, so a clean no-tool turn can appear to end on one of these events. + // Only treat it as idle if the current sub-turn shows no tool activity at all. + // If tools were involved, stay conservative and keep waiting for resumed events or + // the watchdog timeout — this avoids losing between-round tool output after relaunch. + if (type is "assistant.message" or "assistant.turn_end") { var tailTypes = GetTailEventTypes(eventsFile, 30); - if (tailTypes != null && tailTypes.Count > 1) + if (tailTypes != null && tailTypes.Count > 1 && IsCleanNoToolSubturn(tailTypes)) { - // Skip tailTypes[0] (the assistant.message we already know about). - // Walk backwards through earlier events: if we find tool.execution_start - // before hitting assistant.turn_end or session.resume, tools are pending. - bool hasPendingTools = false; - for (int i = 1; i < tailTypes.Count; i++) - { - if (tailTypes[i] == "tool.execution_start") { hasPendingTools = true; break; } - if (tailTypes[i] is "assistant.turn_end" or "session.resume") break; - } - if (!hasPendingTools) - { - Debug($"[RESTORE] events.jsonl for '{sessionId}' ends with {type} and no pending tools — treating as idle"); - return false; - } + Debug($"[RESTORE] events.jsonl for '{sessionId}' ends with {type} and the current sub-turn has no tool activity — treating session as idle"); + return false; } } @@ -251,6 +240,26 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) catch { return null; } } + /// + /// Returns true when the current sub-turn contains no tool activity before the latest + /// assistant.message / assistant.turn_end. The scan stops at the start of the current + /// sub-turn so older tool rounds don't keep a clean completion alive. + /// + internal static bool IsCleanNoToolSubturn(List tailTypes) + { + for (int i = 1; i < tailTypes.Count; i++) + { + var evt = tailTypes[i]; + if (evt.StartsWith("tool.execution_", StringComparison.Ordinal)) + return false; + + if (evt is "assistant.turn_start" or "assistant.turn_end" or "session.resume" or "session.start") + break; + } + + return true; + } + /// /// Checks whether events.jsonl shows a session that was interrupted mid-tool-execution. /// Returns true if the last non-control events are tool.execution_start without matching From 6b91caf675624948e49f9ea4edfd93fe8bd51f3b Mon Sep 17 00:00:00 2001 From: Shane Neuville Date: Fri, 3 Apr 2026 13:58:56 -0500 Subject: [PATCH 4/4] docs: update skills and instructions with IDLE-DEFER-REARM, smart completion, model selection - multi-agent-orchestration SKILL.md: add IDLE-DEFER-REARM subsection, session.idle ephemeral note, INV-O17 invariant, troubleshooting row - copilot-instructions.md: add IDLE-DEFER-REARM, smart completion scan section, model selection priority chain with ShouldAcceptObservedModel Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../skills/multi-agent-orchestration/SKILL.md | 55 +++++++++++++++++++ .github/copilot-instructions.md | 8 +++ 2 files changed, 63 insertions(+) diff --git a/.claude/skills/multi-agent-orchestration/SKILL.md b/.claude/skills/multi-agent-orchestration/SKILL.md index 1661f2dea..f8c6baaab 100644 --- a/.claude/skills/multi-agent-orchestration/SKILL.md +++ b/.claude/skills/multi-agent-orchestration/SKILL.md @@ -338,6 +338,7 @@ When modifying orchestration, verify: - [ ] **INV-O7**: Worker timeouts use 10-minute default (600s for resumed sessions) - [ ] **INV-O8**: Cancellation tokens propagated to all async operations - [ ] **INV-O15**: IDLE-DEFER flushes CurrentResponse before breaking (content preservation) +- [ ] **INV-O17**: IDLE-DEFER-REARM re-arms IsProcessing when idle arrives with backgroundTasks but IsProcessing is already false (PR #472) --- @@ -763,6 +764,59 @@ These layers remain as fallbacks for edge cases where IDLE-DEFER doesn't fire: | `RecoverFromPrematureIdleIfNeededAsync` | Poll signal + events.jsonl freshness | Collect full response after partial TCS | | `IsEventsFileActive` | events.jsonl mtime < 15s | Detect ongoing CLI activity | +### IDLE-DEFER-REARM (PR #472) + +When multiple `session.idle` events arrive for a session with active `backgroundTasks`, +the first IDLE-DEFER correctly defers completion. But if `CompleteResponse` runs between +the first and second idle (e.g., triggered by a watchdog or another code path), then +`IsProcessing` is already `false` when the second idle arrives. Without re-arming, +background agents would finish their work but no completion event would fire — the +classic "zero-idle" symptom where the session appears stuck. + +**IDLE-DEFER-REARM** detects this scenario and re-arms processing: + +```csharp +case SessionIdleEvent idle: + CancelTurnEndFallback(state); + + if (HasActiveBackgroundTasks(idle)) + { + // ... existing IDLE-DEFER logic (flush, break) ... + + // REARM: If IsProcessing is already false, re-arm it so the + // next idle (without background tasks) triggers CompleteResponse. + if (!state.Info.IsProcessing) + { + state.Info.IsProcessing = true; + state.Info.HasUsedToolsThisTurn = true; // 600s tool timeout + RestartProcessingWatchdog(state); + Debug($"[IDLE-DEFER-REARM] ..."); + } + break; + } + + CompleteResponse(state, idleGeneration); +``` + +**What the re-arm sets:** +- `IsProcessing = true` — so the next non-deferred idle fires `CompleteResponse` +- `HasUsedToolsThisTurn = true` — enables the 600s tool timeout instead of 120s, + since background agents may run for several minutes +- Restarts the processing watchdog — provides a safety net if no further idle arrives +- Logs `[IDLE-DEFER-REARM]` to `event-diagnostics.log` + +### `session.idle` Is Ephemeral — Never Persisted + +> **⚠️ `session.idle` is NEVER written to `events.jsonl` by design.** It is a +> transient signal from the CLI, not a persisted event. Any restore or recovery +> logic that depends on reading `session.idle` from disk is fundamentally flawed. +> The poller in `PollEventsAndResumeWhenIdleAsync` watches for `session.shutdown` +> instead, which IS persisted. + +This matters for IDLE-DEFER because after an app restart, there will be no +`session.idle` replay — the watchdog and resume logic handle completion detection +for interrupted sessions, not idle event replay. + --- ## "Fix with Copilot" — Multi-Agent Awareness @@ -959,6 +1013,7 @@ Reflect mode runs multiple iterations. Expect this pattern: | Orchestration hangs on reconnect | Check for missing TrySetCanceled | TCS not canceled; see INV-O9 | | Many IDLE-DEFER entries | `grep "IDLE-DEFER" diagnostics.log` | Normal — worker has active sub-agents; wait for completion | | IDLE-DEFER but worker never completes | Check if background tasks are leaking | Sub-agent/shell not terminating; check CLI logs | +| IDLE-DEFER-REARM entries | `grep "IDLE-DEFER-REARM" diagnostics.log` | Normal — IsProcessing was false when deferred idle arrived; re-armed for next completion | | Worker stuck 30+ min, events.jsonl fresh | Check file size across watchdog cycles | Dead connection — file-size-growth check should catch in ~360s (INV-O16) | --- diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 2009c2c21..8116600a6 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -254,6 +254,8 @@ For multi-agent sessions, Case B also checks **file-size-growth**: if events.jso Note: `session.idle` is an ephemeral event (`ephemeral: true` in the SDK schema) — it is delivered over the live event stream but intentionally NOT written to `events.jsonl`. When `session.idle` includes active `backgroundTasks` (sub-agents, shells), the IDLE-DEFER logic defers completion until a subsequent idle arrives with empty/null backgroundTasks. In rare cases where `IsProcessing` was already cleared (by watchdog timeout or reconnect) before the deferred idle arrives, the session may appear stuck until the watchdog fires again — see issue #403. +When `session.idle` with active `backgroundTasks` arrives but `IsProcessing` is already `false`, the handler re-arms: sets `IsProcessing = true`, `HasUsedToolsThisTurn = true` (600s timeout), restarts watchdog, logs `[IDLE-DEFER-REARM]`. This prevents zero-idle symptoms where IDLE-DEFER fires once but subsequent idles are no-ops. + When the watchdog fires, it marshals state mutations to the UI thread via `InvokeOnUI()` and adds a system warning message. ### Diagnostic Log Tags @@ -262,6 +264,7 @@ The event diagnostics log (`~/.polypilot/event-diagnostics.log`) uses these tags - `[EVT]` — SDK event received (only SessionIdleEvent, AssistantTurnEndEvent, SessionErrorEvent) - `[IDLE]` — SessionIdleEvent dispatched to CompleteResponse - `[IDLE-DEFER]` — SessionIdleEvent deferred due to active background tasks (agents/shells) +- `[IDLE-DEFER-REARM]` — SessionIdleEvent re-armed IsProcessing after it was already cleared - `[COMPLETE]` — CompleteResponse executed or skipped - `[RECONNECT]` — session replaced after disconnect - `[ERROR]` — SessionErrorEvent or SendAsync/reconnect failure cleared IsProcessing @@ -288,6 +291,8 @@ When a user changes the model via the UI dropdown: - The SDK continues using the original model from session creation - To truly switch models, the session must be destroyed and recreated +`GetSessionModel` prioritizes: (1) user's explicit choice (`session.Model`), (2) backend-reported model from usage info, (3) `DefaultModel` fallback. `ShouldAcceptObservedModel()` in `ModelHelper.cs` prevents `SessionUsageInfoEvent` and `AssistantUsageEvent` from overwriting an explicit user model selection — the observed model is only accepted if no explicit choice was made or if the observed model matches the explicit choice. + ### SDK Data Types - `AssistantUsageData` properties (`InputTokens`, `OutputTokens`, etc.) are `Double?` not `int?` - Use `Convert.ToInt32(value)` for conversion, not `value as int?` @@ -302,6 +307,9 @@ Avoid `@bind:event="oninput"` — causes round-trip lag per keystroke. Use plain For detailed stuck-session debugging knowledge (8 invariants from 7 PRs of fix cycles), see `.claude/skills/processing-state-safety/SKILL.md`. +### Smart Completion Scan +**Smart completion scan:** `assistant.turn_end` and `assistant.message` are not unconditionally terminal — they appear between every tool round. `IsSessionStillProcessing` uses `IsCleanNoToolSubturn()` to scan backward from the last event within the current sub-turn (bounded by `assistant.turn_start`/`session.resume`/`session.start`). If any `tool.execution_*` event is found in the current sub-turn, the session is considered still processing. Clean no-tool turns are detected immediately, eliminating the 600s watchdog delay for simple conversations. + ### Session Persistence - Active sessions: `~/.polypilot/active-sessions.json` (includes `LastPrompt` — last user message if session was processing during save) - Session state: `~/.copilot/session-state//events.jsonl` (SDK-managed, stays in ~/.copilot)