diff --git a/.claude/skills/performance-optimization/SKILL.md b/.claude/skills/performance-optimization/SKILL.md index 7dbbab431a..a216e0f440 100644 --- a/.claude/skills/performance-optimization/SKILL.md +++ b/.claude/skills/performance-optimization/SKILL.md @@ -2,15 +2,18 @@ name: performance-optimization description: > Performance invariants and optimization knowledge for PolyPilot's render pipeline, - session switching, persistence, and caching layers. Use when: (1) Modifying - RefreshSessions, GetOrganizedSessions, or SafeRefreshAsync, (2) Touching - SaveActiveSessionsToDisk, SaveOrganization, or SaveUiState, (3) Working with - LoadPersistedSessions or session directory scanning, (4) Modifying markdown - rendering or the message cache, (5) Optimizing render cycle performance or - adding Blazor component rendering, (6) Working with debounce timers or - DisposeAsync cleanup. Covers: session-switch bottleneck fix, debounce flush - requirements, expensive operation guards, cache invalidation strategy, and - render cycle analysis. + session switching, persistence, caching layers, and startup performance. Use when: + (1) Modifying RefreshSessions, GetOrganizedSessions, or SafeRefreshAsync, + (2) Touching SaveActiveSessionsToDisk, SaveOrganization, or SaveUiState, + (3) Working with LoadPersistedSessions or session directory scanning, + (4) Modifying markdown rendering or the message cache, + (5) Optimizing render cycle performance or adding Blazor component rendering, + (6) Working with debounce timers or DisposeAsync cleanup, + (7) Debugging startup slowness or blue screen on launch, + (8) Modifying InitializeAsync or RestoreSessionsInBackgroundAsync. + Covers: session-switch bottleneck fix, debounce flush requirements, expensive + operation guards, cache invalidation strategy, render cycle analysis, and + startup performance debugging. --- # Performance Optimization @@ -99,3 +102,60 @@ item heights — chat messages have variable height from markdown rendering. ### ChatMessageItem has no ShouldRender() Always re-renders when parent calls `StateHasChanged()`. Adding a content hash comparison would skip re-renders for unchanged messages. + +## Startup Performance Debugging + +### Architecture +Startup has two phases: +1. **UI thread (blocking):** MAUI bootstrap → BlazorWebView creation → Blazor framework init → Dashboard `OnInitialized` → `InitializeAsync` → fires `RestoreSessionsInBackgroundAsync` on ThreadPool +2. **Background thread (non-blocking):** Read `active-sessions.json` → create lazy placeholders → eager-resume candidates → `ReconcileOrganization` + +The UI renders as soon as `InitializeAsync` returns. Session restore NEVER blocks the UI thread (runs via `Task.Run`). If you see a blue screen, the problem is in phase 1 (UI thread), not phase 2. + +### Instrumentation +`[STARTUP-TIMING]` log tags in `~/.polypilot/console.log`: +``` +[STARTUP-TIMING] LoadOrganization: 63ms ← reads organization.json +[STARTUP-TIMING] Pre-restore: 64ms ← time before Task.Run +[STARTUP-TIMING] Session loop complete: 35056ms ← background thread, NOT blocking UI +[STARTUP-TIMING] RestoreSessionsInBackground: 35095ms ← total background time +``` + +### How to debug startup slowness + +**Step 1: Measure the UI-visible delay** +``` +BlazorDevFlow ConfigureHandler → Dashboard Restoring UI state +``` +Find both timestamps in console.log for the current PID. The gap is the user-visible startup time. Normal: 5-8 seconds. + +**Step 2: Identify which phase is slow** +- If `[STARTUP-TIMING] Pre-restore` is high (>500ms): `LoadOrganization` or `InitializeAsync` is slow on the UI thread +- If `[STARTUP-TIMING] Session loop` is high but UI rendered fast: background restore is slow but not blocking the user — acceptable +- If neither timing tag appears: the slowdown is in Blazor framework init (before our code runs) — check system load, WebView issues + +**Step 3: A/B test against main** +```bash +# Save current branch +git stash && git checkout main +cd PolyPilot && ./relaunch.sh +# Measure: BlazorDevFlow Config → Dashboard Restore gap + +# Switch back +git checkout && git stash pop +cd PolyPilot && ./relaunch.sh +# Compare gaps +``` + +⚠️ **Use `git stash` + `git checkout` — do NOT `git checkout origin/main -- .`** (checks out files but keeps wrong branch name, confuses the app's branch display). + +**Step 4: Common false alarms** +- CPU load from concurrent test runs or builds causes 2-3x slowdown in Blazor init +- DLL file locks from running app cause build failures (retry after a few seconds) +- The first launch after a `dotnet clean` is always slower (JIT compilation) +- Run the comparison 2-3 times each to account for variance + +### Critical rules +- **NEVER block the UI thread during restore.** All session loading uses `Task.Run` + `ConfigureAwait(false)`. Violations cause blue screen. +- **`LoadPersistedSessions()` is O(N) on ALL session directories** (750+). Never call from `InitializeAsync` or any UI-triggered path. See PERF-2. +- **InvokeOnUI callbacks during restore compete with Blazor rendering.** Minimize them. The restore path should batch state changes and call `NotifyStateChanged` once at the end, not per-session. diff --git a/.claude/skills/processing-state-safety/SKILL.md b/.claude/skills/processing-state-safety/SKILL.md index 73b93e47cd..f84f5de4ed 100644 --- a/.claude/skills/processing-state-safety/SKILL.md +++ b/.claude/skills/processing-state-safety/SKILL.md @@ -8,11 +8,12 @@ description: > SDK event handlers, (4) Debugging stuck sessions showing "Thinking..." forever or spinner stuck, (5) Modifying IsResumed, HasUsedToolsThisTurn, or ActiveToolCallCount, (6) Adding diagnostic log tags, (7) Modifying session restore paths - (RestoreSingleSessionAsync) that must initialize watchdog-dependent state, + (RestorePreviousSessionsAsync / EnsureSessionConnectedAsync) that must initialize + watchdog-dependent state, (8) Modifying ReconcileOrganization or any code that reads Organization.Sessions during the IsRestoring window, (9) Session appears hung or unresponsive after tool use. Covers: 18 invariants from 13 PRs of fix cycles, - the 16 code paths that set/clear IsProcessing, and common regression patterns. + the 21+ code paths that set/clear IsProcessing, and common regression patterns. --- # Processing State Safety @@ -151,9 +152,9 @@ cross-thread fields without a tracking comment explaining the gap. causing stale renders. ### INV-9: Session restore must initialize all watchdog-dependent state -The restore path (`RestoreSingleSessionAsync`) is separate from `SendPromptAsync`. -Any field that affects watchdog timeout selection or dispatch routing must be -initialized in BOTH paths: +The restore path (`RestorePreviousSessionsAsync` + `EnsureSessionConnectedAsync`) is +separate from `SendPromptAsync`. Any field that affects watchdog timeout selection or +dispatch routing must be initialized in BOTH paths: - `IsMultiAgentSession` — set via `IsSessionInMultiAgentGroup()` before `StartProcessingWatchdog` - `HasReceivedEventsSinceResume` / `HasUsedToolsThisTurn` — set via `GetEventsFileRestoreHints()` - `IsResumed` — set on the `AgentSessionInfo` when `isStillProcessing` is true @@ -315,10 +316,10 @@ complete the response while sub-agents are still working. 5. **Missing state initialization on session restore** — `IsMultiAgentSession`, `IsResumed`, and other flags must be set on restored sessions BEFORE `StartProcessingWatchdog` is called. The restore path in - `RestoreSingleSessionAsync` is separate from `SendPromptAsync` and must - independently initialize all state the watchdog depends on. PR #284 fixed - `IsMultiAgentSession` not being set during restore, causing the watchdog - to use 120s instead of 600s for multi-agent workers. + `RestorePreviousSessionsAsync` / `EnsureSessionConnectedAsync` is separate + from `SendPromptAsync` and must independently initialize all state the watchdog + depends on. PR #284 fixed `IsMultiAgentSession` not being set during restore, + causing the watchdog to use 120s instead of 600s for multi-agent workers. **Retired mistake (was #2):** *ActiveToolCallCount as sole tool signal* — still relevant per INV-5, but the more impactful version is #2 above (suppressing the fallback entirely). @@ -345,15 +346,26 @@ When a session shows "Thinking..." indefinitely: | Symptom | Likely Cause | Fix | |---------|-------------|-----| | `[SEND]` then silence | SDK never responded, watchdog will catch at 120s | Wait or abort | - | `[EVT] TurnEnd` but no `[IDLE]` | Zero-idle SDK bug | Watchdog catches at 30s fallback (INV-10) | - | `[IDLE-DEFER]` then long silence | Background tasks (sub-agents/shells) active but never completed | Check agent status; watchdog will eventually catch (INV-18) | + | `[EVT] TurnEnd` but no `[IDLE]` | `session.idle` is ephemeral (never on disk). If live events stopped: IDLE-DEFER deferred the idle and `IsProcessing` was cleared before the follow-up arrived (#403) | Watchdog catches; #403 fix re-arms IsProcessing | + | `[IDLE-DEFER]` then long silence | Background tasks (sub-agents/shells) active | Check `[IDLE-DIAG]` for backgroundTasks count; watchdog will eventually catch (INV-18) | + | `[IDLE-DEFER]` with `IsProcessing=False` | IDLE-DEFER fired but IsProcessing was already cleared by watchdog/reconnect | #403 IDLE-DEFER-REARM fix re-arms IsProcessing | | `[COMPLETE]` fired but spinner persists | UI thread not notified | Check INV-2, INV-8 | | `[WATCHDOG]` clears but re-sticks | New turn started before watchdog callback ran | Check INV-3 generation guard | + | After relaunch: session shows "Working" for 600s | Session was active on CLI during relaunch; poll-then-resume waiting for `session.shutdown` (only disk-persisted terminal event) | Normal — watchdog clears at 600s. `session.idle` is ephemeral, never on disk | 5. **Nuclear option** — user clicks Stop (AbortSessionAsync, path #5/#6). +## Key Facts About session.idle + +- `session.idle` is **`ephemeral: true`** in the SDK schema — intentionally NOT written to events.jsonl +- Events written to disk: `session.start`, `session.resume`, `session.shutdown` — NOT `session.idle` +- PolyPilot receives `SessionIdleEvent` over the live SDK event stream +- CLI correctly populates `backgroundTasks` field (proven with `[IDLE-DIAG]` instrumentation) +- When `backgroundTasks` is active, IDLE-DEFER defers completion until a subsequent idle with empty backgroundTasks +- After app relaunch, the poll-then-resume pattern watches events.jsonl for `session.shutdown` only (the only terminal event on disk) + ## Regression History 10 PRs of fix/regression cycles: #141 → #147 → #148 → #153 → #158 → #163 → #164 → #276 → #284 → #332. -Additional safety PRs: #373 (orphaned state guards), #375 (premature idle re-arm), #399 (IDLE-DEFER for background tasks). +Additional safety PRs: #373 (orphaned state guards), #375 (premature idle re-arm), #399 (IDLE-DEFER for background tasks), #472 (poll-then-resume + IDLE-DEFER-REARM + model selection). See `references/regression-history.md` for the full timeline with root causes. diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 1d8a0c0cf7..2009c2c21d 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -239,7 +239,7 @@ The UI shows: "Sending…" → "Server connected…" → "Thinking…" → "Work **CRITICAL**: Every code path that sets `IsProcessing = false` must clear 9 companion fields and call `FlushCurrentResponse`. This is the most recurring bug category (13 PRs of fix/regression cycles). **Read `.claude/skills/processing-state-safety/SKILL.md` before modifying ANY processing path.** There are 15+ such paths across CopilotService.cs, Events.cs, Bridge.cs, Organization.cs, and Providers.cs. ### Content Persistence -`FlushCurrentResponse` is also called on `AssistantTurnEndEvent` to persist accumulated response text at each sub-turn boundary. This prevents content loss if the app restarts between `turn_end` and `session.idle` (e.g., "zero-idle sessions" where the SDK never emits `session.idle`). The flush includes a dedup guard to prevent duplicate messages from event replay on resume. +`FlushCurrentResponse` is also called on `AssistantTurnEndEvent` to persist accumulated response text at each sub-turn boundary. This prevents content loss if the app restarts between `turn_end` and `session.idle`. When the IDLE-DEFER logic defers `session.idle` (active background tasks), the flush ensures content from the foreground turn is saved. The flush includes a dedup guard to prevent duplicate messages from event replay on resume. ### Processing Watchdog The processing watchdog (`RunProcessingWatchdogAsync` in `CopilotService.Events.cs`) detects stuck sessions by checking how long since the last SDK event. It checks every 15 seconds and has three timeout tiers: @@ -252,7 +252,7 @@ The processing watchdog (`RunProcessingWatchdogAsync` in `CopilotService.Events. For multi-agent sessions, Case B also checks **file-size-growth**: if events.jsonl hasn't grown for `WatchdogCaseBMaxStaleChecks` (2) consecutive deferrals, the session is force-completed — the connection is dead. This catches `ConnectionLostException` scenarios where mtime stays fresh but no new data arrives, reducing detection from 30+ min to ~360s (3 cycles: 1 baseline + 2 stale checks). The 1800s freshness window is preserved. -Note: Some sessions never receive `session.idle` events (SDK/CLI bug). In these "zero-idle" cases, `IsProcessing` is only cleared by the watchdog or user abort. The turn_end flush (see Content Persistence above) ensures response content is not lost. +Note: `session.idle` is an ephemeral event (`ephemeral: true` in the SDK schema) — it is delivered over the live event stream but intentionally NOT written to `events.jsonl`. When `session.idle` includes active `backgroundTasks` (sub-agents, shells), the IDLE-DEFER logic defers completion until a subsequent idle arrives with empty/null backgroundTasks. In rare cases where `IsProcessing` was already cleared (by watchdog timeout or reconnect) before the deferred idle arrives, the session may appear stuck until the watchdog fires again — see issue #403. When the watchdog fires, it marshals state mutations to the UI thread via `InvokeOnUI()` and adds a system warning message. diff --git a/PolyPilot.Tests/BackgroundTasksIdleTests.cs b/PolyPilot.Tests/BackgroundTasksIdleTests.cs index 96f4869d61..7193eaae80 100644 --- a/PolyPilot.Tests/BackgroundTasksIdleTests.cs +++ b/PolyPilot.Tests/BackgroundTasksIdleTests.cs @@ -1,4 +1,5 @@ using GitHub.Copilot.SDK; +using PolyPilot.Models; using PolyPilot.Services; namespace PolyPilot.Tests; @@ -110,4 +111,111 @@ public void HasActiveBackgroundTasks_DataNull_ReturnsFalse() var idle = new SessionIdleEvent { Data = null! }; Assert.False(CopilotService.HasActiveBackgroundTasks(idle)); } + + // --- IDLE-DEFER-REARM tests (issue #403) --- + // These test the expected re-arm conditions when session.idle arrives + // with active background tasks but IsProcessing is already false. + + [Fact] + public void IdleDeferRearm_ShouldRearm_WhenBackgroundTasksActiveAndNotProcessing() + { + // Scenario: watchdog or reconnect cleared IsProcessing, + // then session.idle arrives with active background tasks. + // Expected: should re-arm IsProcessing=true. + var info = new AgentSessionInfo { Name = "test", Model = "test-model" }; + info.IsProcessing = false; // Cleared by watchdog + + var idle = MakeIdle(new SessionIdleDataBackgroundTasks + { + Agents = new[] + { + new SessionIdleDataBackgroundTasksAgentsItem + { + AgentId = "agent-1", AgentType = "copilot", Description = "worker" + } + }, + Shells = Array.Empty() + }); + + // Verify preconditions + Assert.False(info.IsProcessing); + Assert.True(CopilotService.HasActiveBackgroundTasks(idle)); + + // The re-arm logic: if !IsProcessing && HasActiveBackgroundTasks && !WasUserAborted → re-arm + bool shouldRearm = !info.IsProcessing && CopilotService.HasActiveBackgroundTasks(idle); + Assert.True(shouldRearm, "Should re-arm when background tasks active and not processing"); + + // Simulate re-arm + info.IsProcessing = true; + info.ProcessingPhase = 3; // Working (background tasks) + info.ProcessingStartedAt ??= DateTime.UtcNow; + + Assert.True(info.IsProcessing); + Assert.Equal(3, info.ProcessingPhase); + Assert.NotNull(info.ProcessingStartedAt); + } + + [Fact] + public void IdleDeferRearm_ShouldNotRearm_WhenNoBackgroundTasks() + { + // Scenario: session.idle arrives with no background tasks and IsProcessing=false. + // Expected: should NOT re-arm — this is a normal completion. + var info = new AgentSessionInfo { Name = "test", Model = "test-model" }; + info.IsProcessing = false; + + var idle = MakeIdle(bt: null); + + bool shouldRearm = !info.IsProcessing && CopilotService.HasActiveBackgroundTasks(idle); + Assert.False(shouldRearm, "Should NOT re-arm when no background tasks"); + } + + [Fact] + public void IdleDeferRearm_ShouldNotRearm_WhenAlreadyProcessing() + { + // Scenario: session.idle with background tasks but IsProcessing is already true. + // Expected: should NOT re-arm (already armed) — just defer. + var info = new AgentSessionInfo { Name = "test", Model = "test-model" }; + info.IsProcessing = true; // Already processing + + var idle = MakeIdle(new SessionIdleDataBackgroundTasks + { + Agents = new[] + { + new SessionIdleDataBackgroundTasksAgentsItem + { + AgentId = "a1", AgentType = "copilot", Description = "" + } + }, + Shells = Array.Empty() + }); + + bool shouldRearm = !info.IsProcessing && CopilotService.HasActiveBackgroundTasks(idle); + Assert.False(shouldRearm, "Should NOT re-arm when already processing"); + } + + [Fact] + public void IdleDeferRearm_ShouldNotRearm_WhenUserAborted() + { + // Scenario: user clicked Stop, then session.idle with background tasks arrives. + // Expected: should NOT re-arm — user explicitly stopped. + var info = new AgentSessionInfo { Name = "test", Model = "test-model" }; + info.IsProcessing = false; + bool wasUserAborted = true; + + var idle = MakeIdle(new SessionIdleDataBackgroundTasks + { + Agents = new[] + { + new SessionIdleDataBackgroundTasksAgentsItem + { + AgentId = "a1", AgentType = "copilot", Description = "" + } + }, + Shells = Array.Empty() + }); + + // The full re-arm condition includes WasUserAborted check + bool shouldRearm = !info.IsProcessing && CopilotService.HasActiveBackgroundTasks(idle) && !wasUserAborted; + Assert.False(shouldRearm, "Should NOT re-arm when user aborted"); + } } diff --git a/PolyPilot.Tests/EventsJsonlParsingTests.cs b/PolyPilot.Tests/EventsJsonlParsingTests.cs index ed96e80012..077b630eed 100644 --- a/PolyPilot.Tests/EventsJsonlParsingTests.cs +++ b/PolyPilot.Tests/EventsJsonlParsingTests.cs @@ -87,23 +87,27 @@ public void TitleCleaning_RemovesNewlines() [Fact] public void IsSessionStillProcessing_ActiveEventTypes() { + // Terminal events are the only ones that indicate processing is complete. + // Everything else (including intermediate events like assistant.turn_end, + // assistant.message, tool.execution_end) means the session is still active. + var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown" }; + + // These should indicate the session is still processing (not terminal) var activeEvents = new[] { "assistant.turn_start", "tool.execution_start", "tool.execution_progress", "assistant.message_delta", "assistant.reasoning", "assistant.reasoning_delta", - "assistant.intent" + "assistant.intent", "assistant.turn_end", + "assistant.message", "session.start" }; - - // These should indicate the session is still processing foreach (var eventType in activeEvents) { - Assert.Contains(eventType, activeEvents); + Assert.DoesNotContain(eventType, terminalEvents); } - // These should NOT indicate processing - var inactiveEvents = new[] { "session.idle", "assistant.message", "session.start" }; - foreach (var eventType in inactiveEvents) + // These SHOULD indicate processing is complete (terminal) — must not appear in activeEvents + foreach (var eventType in terminalEvents) { Assert.DoesNotContain(eventType, activeEvents); } diff --git a/PolyPilot.Tests/StateChangeCoalescerTests.cs b/PolyPilot.Tests/StateChangeCoalescerTests.cs index 5f41e44e54..64185598a6 100644 --- a/PolyPilot.Tests/StateChangeCoalescerTests.cs +++ b/PolyPilot.Tests/StateChangeCoalescerTests.cs @@ -60,7 +60,7 @@ public async Task SingleCall_FiresExactlyOnce() svc.NotifyStateChangedCoalesced(); // Wait well beyond the coalesce window (150ms) to ensure the timer has fired, // even under heavy CI load. Single call can only ever produce exactly 1 fire. - await Task.Delay(600); + await Task.Delay(1200); Assert.Equal(1, fireCount); } diff --git a/PolyPilot.Tests/StuckSessionRecoveryTests.cs b/PolyPilot.Tests/StuckSessionRecoveryTests.cs index bd82d41907..805f62d011 100644 --- a/PolyPilot.Tests/StuckSessionRecoveryTests.cs +++ b/PolyPilot.Tests/StuckSessionRecoveryTests.cs @@ -303,6 +303,9 @@ public void StalenessThreshold_UsesWatchdogToolExecutionTimeout() [InlineData("assistant.reasoning")] [InlineData("assistant.reasoning_delta")] [InlineData("assistant.intent")] + [InlineData("assistant.turn_end")] // between tool rounds — still processing + [InlineData("assistant.message")] // mid-turn text — still processing + [InlineData("tool.execution_end")] // tool just completed — still processing public void IsSessionStillProcessing_AllActiveEventTypes_ReturnTrue(string eventType) { var svc = CreateService(); @@ -326,11 +329,10 @@ public void IsSessionStillProcessing_AllActiveEventTypes_ReturnTrue(string event [Theory] [InlineData("session.idle")] - [InlineData("assistant.message")] - [InlineData("session.start")] - [InlineData("assistant.turn_end")] - [InlineData("tool.execution_end")] - public void IsSessionStillProcessing_InactiveEventTypes_ReturnFalse(string eventType) + [InlineData("session.error")] + [InlineData("session.shutdown")] + [InlineData("session.start")] // created but never used — not actively processing + public void IsSessionStillProcessing_TerminalEventTypes_ReturnFalse(string eventType) { var svc = CreateService(); var tmpDir = Path.Combine(Path.GetTempPath(), "polypilot-test-" + Guid.NewGuid().ToString("N")); @@ -343,7 +345,7 @@ public void IsSessionStillProcessing_InactiveEventTypes_ReturnFalse(string event { File.WriteAllText(eventsFile, $$$"""{"type":"{{{eventType}}}","data":{}}"""); var result = svc.IsSessionStillProcessing(sessionId, tmpDir); - Assert.False(result, $"Inactive event type '{eventType}' should not report still processing"); + Assert.False(result, $"Terminal event type '{eventType}' should not report still processing"); } finally { diff --git a/PolyPilot.Tests/WsBridgeIntegrationTests.cs b/PolyPilot.Tests/WsBridgeIntegrationTests.cs index 1090d6b88d..3b84c7ea1c 100644 --- a/PolyPilot.Tests/WsBridgeIntegrationTests.cs +++ b/PolyPilot.Tests/WsBridgeIntegrationTests.cs @@ -683,12 +683,12 @@ public async Task RenameSession_ViaClient_RenamesOnServer() await InitDemoMode(); await _copilot.CreateSessionAsync("old-name", "gpt-4.1"); - using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(15)); + using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(30)); var client = await ConnectClientAsync(cts.Token); await WaitForAsync(() => client.Sessions.Any(s => s.Name == "old-name"), cts.Token); await client.RenameSessionAsync("old-name", "new-name", cts.Token); - await WaitForAsync(() => _copilot.GetSession("new-name") != null, cts.Token, maxMs: 12000); + await WaitForAsync(() => _copilot.GetSession("new-name") != null, cts.Token, maxMs: 25000); Assert.Null(_copilot.GetSession("old-name")); Assert.NotNull(_copilot.GetSession("new-name")); diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index 62e0b46095..561fda5b8a 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -3357,15 +3357,16 @@ private string GetSessionModel(AgentSessionInfo session) { - // Priority: 1) Usage info (TRUTH) - what the backend is actually using - if (usageBySession.TryGetValue(session.Name, out var u) && !string.IsNullOrEmpty(u.Model)) - return u.Model; - - // Priority: 2) Session model property (initial configuration) + // Priority: 1) Session model property (user's explicit choice, persisted across restarts) if (!string.IsNullOrEmpty(session.Model) && session.Model != "resumed") return session.Model; - return availableModels[0]; + // Priority: 2) Usage info - what the backend reported + if (usageBySession.TryGetValue(session.Name, out var u) && !string.IsNullOrEmpty(u.Model)) + return u.Model; + + // Fallback: default model (never alphabetical first which may be haiku) + return CopilotService.DefaultModel; } private string GetExpandedModel(AgentSessionInfo session) diff --git a/PolyPilot/Models/ModelHelper.cs b/PolyPilot/Models/ModelHelper.cs index 0734ce893a..30b3b65a1f 100644 --- a/PolyPilot/Models/ModelHelper.cs +++ b/PolyPilot/Models/ModelHelper.cs @@ -13,6 +13,7 @@ public static class ModelHelper public static IReadOnlyList FallbackModels { get; } = new[] { "claude-opus-4.6", + "claude-opus-4.6-1m", "claude-opus-4.6-fast", "claude-opus-4.5", "claude-sonnet-4.5", @@ -89,6 +90,30 @@ public static bool IsDisplayName(string? model) return model.Any(char.IsUpper) || model.Contains(' '); } + /// + /// Converts a model slug to a human-friendly display name. + /// If displayNames dictionary is provided and contains the slug, uses the SDK's display name. + /// Otherwise falls back to algorithmic prettification. + /// + public static string PrettifyModel(string modelId, IReadOnlyDictionary? displayNames = null) + { + if (string.IsNullOrEmpty(modelId)) return modelId; + + // If we have an SDK-provided display name, prefer it + if (displayNames != null && displayNames.TryGetValue(modelId, out var sdkName)) + return sdkName; + + // Algorithmic fallback + var display = modelId + .Replace("claude-", "Claude ") + .Replace("gpt-", "GPT-") + .Replace("gemini-", "Gemini "); + display = display.Replace("-", " "); + display = display.Replace("GPT ", "GPT-"); + return string.Join(' ', display.Split(' ', StringSplitOptions.RemoveEmptyEntries).Select(s => + s.Length > 0 ? char.ToUpper(s[0]) + s[1..] : s)); + } + internal static string? ExtractLatestModelFromEvents(IEnumerable lines) { string? latestModel = null; diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index 295684bb33..118a575fcc 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -644,6 +644,14 @@ void Invoke(Action action) // Cancel the TurnEnd→Idle fallback — normal SessionIdleEvent arrived CancelTurnEndFallback(state); + // Diagnostic: dump raw backgroundTasks payload to prove whether CLI populates it consistently + { + var bt = idle.Data?.BackgroundTasks; + var agentCount = bt?.Agents?.Length ?? -1; + var shellCount = bt?.Shells?.Length ?? -1; + Debug($"[IDLE-DIAG] '{sessionName}' session.idle payload: backgroundTasks={{agents={agentCount}, shells={shellCount}, null={bt == null}}}"); + } + // KEY FIX: Check if the server reports active background tasks (sub-agents, shells). // session.idle with background tasks means "foreground quiesced, background still running." // Do NOT treat this as terminal — flush text and wait for the real idle. @@ -658,6 +666,23 @@ void Invoke(Action action) { if (state.IsOrphaned) return; FlushCurrentResponse(state); + + // FIX #403: If IsProcessing was already cleared (by watchdog timeout, + // reconnect, or prior EVT-REARM cycle), re-arm it. Without this, the + // session appears done but background tasks are still running — the + // orchestrator collects a truncated/empty response. + // Guards mirror EVT-REARM: skip if orphaned or user-aborted. + if (!state.Info.IsProcessing && !state.WasUserAborted) + { + Debug($"[IDLE-DEFER-REARM] '{sessionName}' re-arming IsProcessing — background tasks active but processing was cleared"); + state.Info.IsProcessing = true; + state.Info.ProcessingPhase = 3; // Working (background tasks) + state.Info.ProcessingStartedAt ??= DateTime.UtcNow; + state.HasUsedToolsThisTurn = true; // 600s timeout (background tasks can run long) + state.IsMultiAgentSession = IsSessionInMultiAgentGroup(sessionName); + StartProcessingWatchdog(state, sessionName); + } + NotifyStateChangedCoalesced(); }); break; // Don't complete — wait for next idle without background tasks @@ -710,8 +735,18 @@ await notifService.SendNotificationAsync( if (!string.IsNullOrEmpty(startModel)) { var normalizedStartModel = Models.ModelHelper.NormalizeToSlug(startModel); - state.Info.Model = normalizedStartModel; - Debug($"Session model from start event: {startModel} → {normalizedStartModel}"); + // Only update if the user hasn't already set a model — the CLI may + // report a default model (e.g. haiku) after abort/resume that overrides + // the user's explicit choice. + if (string.IsNullOrEmpty(state.Info.Model) || state.Info.Model == "resumed") + { + state.Info.Model = normalizedStartModel; + Debug($"Session model from start event: {startModel} → {normalizedStartModel}"); + } + else if (normalizedStartModel != state.Info.Model) + { + Debug($"Session model from start event ignored: {startModel} → {normalizedStartModel} (keeping user choice: {state.Info.Model})"); + } } Invoke(() => { if (!IsRestoring) SaveActiveSessionsToDisk(); }); break; diff --git a/PolyPilot/Services/CopilotService.Persistence.cs b/PolyPilot/Services/CopilotService.Persistence.cs index fbf7d825f7..820c14f7ab 100644 --- a/PolyPilot/Services/CopilotService.Persistence.cs +++ b/PolyPilot/Services/CopilotService.Persistence.cs @@ -530,6 +530,7 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok if (entries != null && entries.Count > 0) { Debug($"Restoring {entries.Count} previous sessions..."); + var restoreSw = System.Diagnostics.Stopwatch.StartNew(); IsRestoring = true; // Snapshot groups once for thread safety — bridge/SDK events can @@ -646,16 +647,49 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok _sessions[entry.DisplayName] = lazyState; _activeSessionName ??= entry.DisplayName; RestoreUsageStats(entry); - // Eagerly resume sessions that are still actively processing on the - // headless server. Check events.jsonl (authoritative) first, then fall - // back to LastPrompt (saved when IsProcessing=true at debounce time). - // Without this, actively-running sessions appear idle after app restart - // because they're only loaded as lazy placeholders with no SDK connection. + // Check if session is still actively processing on the headless server. var isStillActive = IsSessionStillProcessing(entry.SessionId); - if (isStillActive || !string.IsNullOrWhiteSpace(entry.LastPrompt)) + if (isStillActive) { + // Session is actively running on the copilot server (tool calls in + // flight). Do NOT eager-resume — ResumeSessionAsync kills in-flight + // tool execution on the CLI (the resume replaces the event stream and + // abandons running tools). Instead, mark as processing locally and + // start a file-based poller that watches events.jsonl for completion. + // When the CLI finishes, the poller triggers a lazy-resume to pick up + // the results. + Debug($"Deferring resume for actively-processing session: {entry.DisplayName} (will poll events.jsonl)"); + var capturedState = lazyState; + var capturedName = entry.DisplayName; + var capturedSessionId = entry.SessionId; + InvokeOnUI(() => + { + capturedState.Info.IsProcessing = true; + capturedState.Info.IsResumed = true; + capturedState.HasUsedToolsThisTurn = true; + // INV-9: Set IsMultiAgentSession so the watchdog uses the correct + // timeout tier (600s for multi-agent workers, not 120s). + capturedState.IsMultiAgentSession = IsSessionInMultiAgentGroup(capturedName); + capturedState.Info.ProcessingPhase = 3; // Working + capturedState.Info.ProcessingStartedAt = DateTime.UtcNow; + // Reset LastUpdatedAt so the UI doesn't show stale "Xm ago" from + // a previous app instance. Without this, sessions show "494m ago" + // because LastUpdatedAt is only updated by SDK events (which don't + // arrive during the poll-then-resume window). + capturedState.Info.LastUpdatedAt = DateTime.Now; + StartProcessingWatchdog(capturedState, capturedName); + NotifyStateChanged(); + }); + // Poll events.jsonl — when the CLI finishes (session.idle/shutdown), + // trigger a lazy resume to connect and pick up the response. + _ = PollEventsAndResumeWhenIdleAsync(capturedName, capturedState, capturedSessionId, cancellationToken); + } + else if (!string.IsNullOrWhiteSpace(entry.LastPrompt)) + { + // Session had a pending prompt but CLI is no longer active — + // safe to eager-resume to retry the prompt. eagerResumeCandidates.Add((entry.DisplayName, lazyState)); - Debug($"Queued eager resume for interrupted session: {entry.DisplayName} (active={isStillActive}, hasLastPrompt={!string.IsNullOrWhiteSpace(entry.LastPrompt)})"); + Debug($"Queued eager resume for interrupted session: {entry.DisplayName} (hasLastPrompt=true)"); } Debug($"Loaded session placeholder: {entry.DisplayName} ({lazyHistory.Count} messages)"); } @@ -786,6 +820,7 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok }, cancellationToken); } + Debug($"[STARTUP-TIMING] Session loop complete: {restoreSw.ElapsedMilliseconds}ms ({entries.Count} sessions)"); IsRestoring = false; } } @@ -797,6 +832,148 @@ public async Task RestorePreviousSessionsAsync(CancellationToken cancellationTok } + /// + /// Polls events.jsonl for a session that's actively processing on the CLI. + /// When the CLI finishes (session.idle or session.shutdown appears, or the file + /// goes stale), triggers a lazy-resume to connect and load the response. + /// + /// IMPORTANT: We cannot call ResumeSessionAsync while the CLI is running tools — + /// the resume command kills in-flight tool execution. This poller bridges the gap + /// by waiting for the CLI to finish before connecting. + /// + private async Task PollEventsAndResumeWhenIdleAsync( + string sessionName, SessionState state, string sessionId, CancellationToken ct) + { + var eventsFile = Path.Combine(SessionStatePath, sessionId, "events.jsonl"); + var maxPollTime = TimeSpan.FromMinutes(30); + var pollInterval = TimeSpan.FromSeconds(5); + var started = DateTime.UtcNow; + + Debug($"[POLL] Starting events.jsonl poll for '{sessionName}' (id={sessionId})"); + + try + { + while (!ct.IsCancellationRequested && (DateTime.UtcNow - started) < maxPollTime) + { + await Task.Delay(pollInterval, ct); + + // Check if the user already interacted (sent a message) which would have + // triggered EnsureSessionConnectedAsync — no need to poll anymore. + if (state.Session != null) + { + Debug($"[POLL] '{sessionName}' already connected (user interaction) — stopping poll"); + return; + } + + // Check if the watchdog already cleared IsProcessing — session is done. + if (!state.Info.IsProcessing) + { + Debug($"[POLL] '{sessionName}' no longer processing (watchdog cleared) — stopping poll"); + return; + } + + // Read last event type from events.jsonl for terminal events. + // NOTE: session.idle is ephemeral (never written to events.jsonl by design). + // session.error is also not persisted. Only session.shutdown is reliably on disk. + // The watchdog is the primary completion detection for disconnected sessions. + var lastEventType = GetLastEventType(eventsFile); + if (lastEventType == null) continue; + + var isTerminal = lastEventType is "session.shutdown"; + + if (isTerminal) + { + Debug($"[POLL] '{sessionName}' CLI finished (lastEvent={lastEventType}) — resuming session"); + + // INV-3/INV-12: Capture generation BEFORE async operations to prevent + // stale poller from corrupting a new turn started by user interaction. + var pollerGen = Interlocked.Read(ref state.ProcessingGeneration); + + // Load the full history from events.jsonl (includes response content). + // Merge happens inside InvokeOnUI because History is an ObservableCollection + // and all other mutations run on the UI thread. + List? diskHistory = null; + try + { + (diskHistory, _) = await LoadBestHistoryAsync(sessionId); + } + catch (Exception ex) + { + Debug($"[POLL] '{sessionName}' failed to load disk history: {ex.Message}"); + } + + // Now safe to resume — the CLI is idle, no tools to interrupt. + // Only connect if user hasn't already connected via SendPromptAsync + if (state.Session == null) + { + try + { + await EnsureSessionConnectedAsync(sessionName, state, ct); + Debug($"[POLL] '{sessionName}' lazy-resume complete after poll"); + } + catch (Exception ex) + { + Debug($"[POLL] '{sessionName}' lazy-resume failed: {ex.Message}"); + } + } + + // Complete the response — the session is done. + InvokeOnUI(() => + { + // INV-3: Generation guard — if user sent a new message during + // the poll→resume window, this completion belongs to the old turn. + if (Interlocked.Read(ref state.ProcessingGeneration) != pollerGen) return; + if (!state.Info.IsProcessing) return; // watchdog already cleared + + // Merge disk history on UI thread (History is ObservableCollection) + if (diskHistory != null && diskHistory.Count > state.Info.History.Count) + { + var existingCount = state.Info.History.Count; + foreach (var msg in diskHistory.Skip(existingCount)) + state.Info.History.Add(msg); + Debug($"[POLL] '{sessionName}' loaded {diskHistory.Count - existingCount} new messages from disk (total={state.Info.History.Count})"); + } + FlushCurrentResponse(state); + CompleteResponse(state, pollerGen); + NotifyStateChanged(); + }); + return; + } + } + + if ((DateTime.UtcNow - started) >= maxPollTime) + { + Debug($"[POLL-TIMEOUT] '{sessionName}' poll timed out after {maxPollTime.TotalMinutes:F0} minutes — cleaning up"); + } + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + Debug($"[POLL] '{sessionName}' poll cancelled"); + } + catch (Exception ex) + { + Debug($"[POLL] '{sessionName}' poll error: {ex.Message}"); + } + finally + { + // Safety net: if IsProcessing is still true and generation hasn't changed, + // clear it so the session doesn't appear stuck indefinitely. + var finalGen = Interlocked.Read(ref state.ProcessingGeneration); + if (state.Info.IsProcessing && state.Session == null) + { + InvokeOnUI(() => + { + if (Interlocked.Read(ref state.ProcessingGeneration) != finalGen) return; + if (!state.Info.IsProcessing) return; + Debug($"[POLL-CLEANUP] '{sessionName}' clearing stuck IsProcessing after poll exit"); + FlushCurrentResponse(state); + CompleteResponse(state, finalGen); + NotifyStateChanged(); + }); + } + } + } + public void SaveUiState(string currentPage, string? activeSession = null, int? fontSize = null, string? selectedModel = null, bool? expandedGrid = null, string? expandedSession = "<>", Dictionary? inputModes = null, int? gridColumns = null, int? cardMinHeight = null) { try diff --git a/PolyPilot/Services/CopilotService.Utilities.cs b/PolyPilot/Services/CopilotService.Utilities.cs index 61d0eabcd2..a68986e010 100644 --- a/PolyPilot/Services/CopilotService.Utilities.cs +++ b/PolyPilot/Services/CopilotService.Utilities.cs @@ -134,14 +134,17 @@ internal bool IsSessionStillProcessing(string sessionId, string basePath) using var doc = JsonDocument.Parse(lastLine); var type = doc.RootElement.GetProperty("type").GetString(); - - var activeEvents = new[] { - "assistant.turn_start", "tool.execution_start", - "tool.execution_progress", "assistant.message_delta", - "assistant.reasoning", "assistant.reasoning_delta", - "assistant.intent" - }; - return activeEvents.Contains(type); + if (type == null) return false; // Corrupt/partial event — treat as terminal + + // Use a blacklist of terminal events rather than a whitelist of active ones. + // Any event that is NOT terminal means the session is still processing. + // The old whitelist missed intermediate states like assistant.turn_end (between + // tool rounds), assistant.message, and tool.execution_complete, causing + // actively-processing sessions to be incorrectly detected as idle on restore. + // session.idle is ephemeral (never on disk). session.start means session was + // created but never used — not actively processing. All are non-active states. + var terminalEvents = new[] { "session.idle", "session.error", "session.shutdown", "session.start" }; + return !terminalEvents.Contains(type); } catch { return false; } } @@ -794,15 +797,29 @@ private async Task FetchAvailableModelsAsync() var modelList = await _client.ListModelsAsync(); if (modelList != null && modelList.Count > 0) { - var models = modelList - .Where(m => !string.IsNullOrEmpty(m.Id)) - .Select(m => m.Id!) - .OrderBy(m => m) - .ToList(); + // Use Id (slug) as the canonical model identifier, not Name (display name). + // Name is a display string like "Claude Opus 4.6 (1M Context)(Internal Only)" + // which NormalizeToSlug can't reliably round-trip. Id is the SDK slug. + var displayNames = new Dictionary(StringComparer.OrdinalIgnoreCase); + var models = new List(); + foreach (var m in modelList) + { + var id = m.Id; + var name = m.Name; + // Prefer Id (slug); fall back to Name if Id is missing + var key = !string.IsNullOrEmpty(id) ? id : name; + if (string.IsNullOrEmpty(key)) continue; + if (!models.Contains(key)) + models.Add(key); + if (!string.IsNullOrEmpty(name) && !string.IsNullOrEmpty(id)) + displayNames[id] = name; + } + models.Sort(StringComparer.OrdinalIgnoreCase); if (models.Count > 0) { _localAvailableModels = models; - Debug($"Loaded {models.Count} models from SDK"); + ModelDisplayNames = displayNames; + Debug($"Loaded {models.Count} models from SDK (ids)"); OnStateChanged?.Invoke(); } } diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 8015a065ed..061b753f7f 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -264,6 +264,11 @@ private static string FindProjectDir() ? _bridgeClient.AvailableModels : _localAvailableModels; private List _localAvailableModels = new(); + /// + /// Maps model slug (Id) → display name from the SDK's ListModelsAsync. + /// Used for richer display when the algorithmic prettification isn't sufficient. + /// + public Dictionary ModelDisplayNames { get; private set; } = new(); private readonly RepoManager _repoManager; private readonly CodespaceService _codespaceService; @@ -1088,7 +1093,9 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default) _ = CheckAuthStatusAsync(); // Load organization state FIRST (groups, pinning, sorting) so reconcile during restore doesn't wipe it + var startupSw = System.Diagnostics.Stopwatch.StartNew(); LoadOrganization(); + Debug($"[STARTUP-TIMING] LoadOrganization: {startupSw.ElapsedMilliseconds}ms"); // Session restore runs in the background so the UI renders immediately. // With many sessions (40+), sequential ResumeSessionAsync calls can take @@ -1099,7 +1106,13 @@ public async Task InitializeAsync(CancellationToken cancellationToken = default) // Without Task.Run, the async continuations run on the UI thread. LoadHistoryFromDisk's // .GetAwaiter().GetResult() then blocks the UI thread waiting for async file I/O whose // continuation needs the UI thread → classic SyncContext deadlock → blue screen. - _ = Task.Run(() => RestoreSessionsInBackgroundAsync(cancellationToken)); + Debug($"[STARTUP-TIMING] Pre-restore: {startupSw.ElapsedMilliseconds}ms"); + _ = Task.Run(async () => + { + var restoreSw = System.Diagnostics.Stopwatch.StartNew(); + await RestoreSessionsInBackgroundAsync(cancellationToken); + Debug($"[STARTUP-TIMING] RestoreSessionsInBackground: {restoreSw.ElapsedMilliseconds}ms"); + }); // Initialize any registered providers (from DI / plugin loader) await InitializeProvidersAsync(cancellationToken);